Home | History | Annotate | Download | only in util
      1 package org.unicode.cldr.util;
      2 
      3 import java.util.Arrays;
      4 import java.util.Collections;
      5 import java.util.EnumMap;
      6 import java.util.HashMap;
      7 import java.util.HashSet;
      8 import java.util.Iterator;
      9 import java.util.LinkedHashMap;
     10 import java.util.LinkedHashSet;
     11 import java.util.List;
     12 import java.util.Locale;
     13 import java.util.Map;
     14 import java.util.Map.Entry;
     15 import java.util.Set;
     16 import java.util.TreeMap;
     17 import java.util.TreeSet;
     18 import java.util.regex.Matcher;
     19 import java.util.regex.Pattern;
     20 
     21 import org.unicode.cldr.draft.ScriptMetadata;
     22 import org.unicode.cldr.draft.ScriptMetadata.Info;
     23 import org.unicode.cldr.tool.LikelySubtags;
     24 import org.unicode.cldr.util.RegexLookup.Finder;
     25 import org.unicode.cldr.util.With.SimpleIterator;
     26 
     27 import com.google.common.base.Splitter;
     28 import com.ibm.icu.dev.util.CollectionUtilities;
     29 import com.ibm.icu.impl.Relation;
     30 import com.ibm.icu.impl.Row;
     31 import com.ibm.icu.lang.UCharacter;
     32 import com.ibm.icu.text.Collator;
     33 import com.ibm.icu.text.Transform;
     34 import com.ibm.icu.util.ICUException;
     35 import com.ibm.icu.util.Output;
     36 import com.ibm.icu.util.ULocale;
     37 
     38 /**
     39  * Provides a mechanism for dividing up LDML paths into understandable
     40  * categories, eg for the Survey tool.
     41  */
     42 public class PathHeader implements Comparable<PathHeader> {
     43     /**
     44      * Link to a section. Commenting out the page switch for now.
     45      */
     46     public static final String SECTION_LINK = "<a " + /* "target='CLDR_ST-SECTION' "+*/"href='";
     47     static boolean UNIFORM_CONTINENTS = true;
     48     static Factory factorySingleton = null;
     49 
     50     static final boolean SKIP_ORIGINAL_PATH = true;
     51 
     52     /**
     53      * What status the survey tool should use. Can be overridden in
     54      * Phase.getAction()
     55      */
     56     public enum SurveyToolStatus {
     57         /**
     58          * Never show.
     59          */
     60         DEPRECATED,
     61         /**
     62          * Hide. Can be overridden in Phase.getAction()
     63          */
     64         HIDE,
     65         /**
     66          * Don't allow Change box (except TC), instead show ticket. But allow
     67          * votes. Can be overridden in Phase.getAction()
     68          */
     69         READ_ONLY,
     70         /**
     71          * Allow change box and votes. Can be overridden in Phase.getAction()
     72          */
     73         READ_WRITE,
     74         /**
     75          * Changes are allowed as READ_WRITE, but field is always displayed as
     76          * LTR, even in RTL locales (used for patterns).
     77          */
     78         LTR_ALWAYS
     79     }
     80 
     81     private static EnumNames<SectionId> SectionIdNames = new EnumNames<SectionId>();
     82 
     83     /**
     84      * The Section for a path. Don't change these without committee buy-in. The
     85      * 'name' may be 'Core_Data' and the toString is 'Core Data' toString gives
     86      * the human name
     87      */
     88     public enum SectionId {
     89         Core_Data("Core Data"), Locale_Display_Names("Locale Display Names"), DateTime("Date & Time"), Timezones, Numbers, Currencies, Units, Characters, Misc(
     90             "Miscellaneous"), BCP47, Supplemental, Special;
     91 
     92         private SectionId(String... alternateNames) {
     93             SectionIdNames.add(this, alternateNames);
     94         }
     95 
     96         public static SectionId forString(String name) {
     97             return SectionIdNames.forString(name);
     98         }
     99 
    100         public String toString() {
    101             return SectionIdNames.toString(this);
    102         }
    103     }
    104 
    105     private static EnumNames<PageId> PageIdNames = new EnumNames<PageId>();
    106     private static Relation<SectionId, PageId> SectionIdToPageIds = Relation.of(new TreeMap<SectionId, Set<PageId>>(),
    107         TreeSet.class);
    108 
    109     private static class SubstringOrder implements Comparable<SubstringOrder> {
    110         final String mainOrder;
    111         final int order;
    112 
    113         public SubstringOrder(String source) {
    114             int pos = source.lastIndexOf('-') + 1;
    115             int ordering = COUNTS.indexOf(source.substring(pos));
    116             // account for digits, and "some" future proofing.
    117             order = ordering < 0
    118                 ? source.charAt(pos)
    119                     : 0x10000 + ordering;
    120                 mainOrder = source.substring(0, pos);
    121         }
    122 
    123         @Override
    124         public String toString() {
    125             return "{" + mainOrder + ", " + order + "}";
    126         }
    127 
    128         @Override
    129         public int compareTo(SubstringOrder other) {
    130             int diff = alphabeticCompare(mainOrder, other.mainOrder);
    131             if (diff != 0) {
    132                 return diff;
    133             }
    134             return order - other.order;
    135         }
    136     }
    137 
    138     /**
    139      * The Page for a path (within a Section). Don't change these without
    140      * committee buy-in. the name is for example WAsia where toString gives
    141      * Western Asia
    142      */
    143     public enum PageId {
    144         Alphabetic_Information(SectionId.Core_Data, "Alphabetic Information"), Numbering_Systems(SectionId.Core_Data,
    145             "Numbering Systems"), Locale_Name_Patterns(SectionId.Locale_Display_Names, "Locale Name Patterns"), Languages_A_D(SectionId.Locale_Display_Names,
    146                 "Languages (A-D)"), Languages_E_J(SectionId.Locale_Display_Names, "Languages (E-J)"), Languages_K_N(SectionId.Locale_Display_Names,
    147                     "Languages (K-N)"), Languages_O_S(SectionId.Locale_Display_Names, "Languages (O-S)"), Languages_T_Z(SectionId.Locale_Display_Names,
    148                         "Languages (T-Z)"), Scripts(SectionId.Locale_Display_Names), Territories(SectionId.Locale_Display_Names,
    149                             "Geographic Regions"), T_NAmerica(SectionId.Locale_Display_Names, "Territories (North America)"), T_SAmerica(
    150                                 SectionId.Locale_Display_Names,
    151                                 "Territories (South America)"), T_Africa(SectionId.Locale_Display_Names, "Territories (Africa)"), T_Europe(
    152                                     SectionId.Locale_Display_Names,
    153                                     "Territories (Europe)"), T_Asia(SectionId.Locale_Display_Names, "Territories (Asia)"), T_Oceania(
    154                                         SectionId.Locale_Display_Names,
    155                                         "Territories (Oceania)"), Locale_Variants(SectionId.Locale_Display_Names, "Locale Variants"), Keys(
    156                                             SectionId.Locale_Display_Names), Fields(SectionId.DateTime), Gregorian(SectionId.DateTime), Generic(
    157                                                 SectionId.DateTime), Buddhist(SectionId.DateTime), Chinese(SectionId.DateTime), Coptic(
    158                                                     SectionId.DateTime), Dangi(SectionId.DateTime), Ethiopic(SectionId.DateTime), Ethiopic_Amete_Alem(
    159                                                         SectionId.DateTime, "Ethiopic-Amete-Alem"), Hebrew(SectionId.DateTime), Indian(
    160                                                             SectionId.DateTime), Islamic(SectionId.DateTime), Japanese(SectionId.DateTime), Persian(
    161                                                                 SectionId.DateTime), Minguo(SectionId.DateTime), Timezone_Display_Patterns(SectionId.Timezones,
    162                                                                     "Timezone Display Patterns"), NAmerica(SectionId.Timezones, "North America"), SAmerica(
    163                                                                         SectionId.Timezones, "South America"), Africa(SectionId.Timezones), Europe(
    164                                                                             SectionId.Timezones), Russia(SectionId.Timezones), WAsia(SectionId.Timezones,
    165                                                                                 "Western Asia"), CAsia(SectionId.Timezones, "Central Asia"), EAsia(
    166                                                                                     SectionId.Timezones,
    167                                                                                     "Eastern Asia"), SAsia(SectionId.Timezones, "Southern Asia"), SEAsia(
    168                                                                                         SectionId.Timezones,
    169                                                                                         "Southeast Asia"), Australasia(SectionId.Timezones), Antarctica(
    170                                                                                             SectionId.Timezones), Oceania(SectionId.Timezones), UnknownT(
    171                                                                                                 SectionId.Timezones,
    172                                                                                                 "Unknown Region"), Overrides(SectionId.Timezones), Symbols(
    173                                                                                                     SectionId.Numbers), MinimalPairs(SectionId.Numbers,
    174                                                                                                         "Minimal Pairs"), Number_Formatting_Patterns(
    175                                                                                                             SectionId.Numbers,
    176                                                                                                             "Number Formatting Patterns"), Compact_Decimal_Formatting(
    177                                                                                                                 SectionId.Numbers,
    178                                                                                                                 "Compact Decimal Formatting"), Compact_Decimal_Formatting_Other(
    179                                                                                                                     SectionId.Numbers,
    180                                                                                                                     "Compact Decimal Formatting (Other Numbering Systems)"), Measurement_Systems(
    181                                                                                                                         SectionId.Units,
    182                                                                                                                         "Measurement Systems"), Duration(
    183                                                                                                                             SectionId.Units), Length(
    184                                                                                                                                 SectionId.Units), Area(
    185                                                                                                                                     SectionId.Units), Volume(
    186                                                                                                                                         SectionId.Units), SpeedAcceleration(
    187                                                                                                                                             SectionId.Units,
    188                                                                                                                                             "Speed and Acceleration"), MassWeight(
    189                                                                                                                                                 SectionId.Units,
    190                                                                                                                                                 "Mass and Weight"), EnergyPower(
    191                                                                                                                                                     SectionId.Units,
    192                                                                                                                                                     "Energy and Power"), ElectricalFrequency(
    193                                                                                                                                                         SectionId.Units,
    194                                                                                                                                                         "Electrical and Frequency"), Weather(
    195                                                                                                                                                             SectionId.Units), Digital(
    196                                                                                                                                                                 SectionId.Units), Coordinates(
    197                                                                                                                                                                     SectionId.Units), OtherUnits(
    198                                                                                                                                                                         SectionId.Units,
    199                                                                                                                                                                         "Other Units"), CompoundUnits(
    200                                                                                                                                                                             SectionId.Units,
    201                                                                                                                                                                             "Compound Units"), Displaying_Lists(
    202                                                                                                                                                                                 SectionId.Misc,
    203                                                                                                                                                                                 "Displaying Lists"), LinguisticElements(
    204                                                                                                                                                                                     SectionId.Misc,
    205                                                                                                                                                                                     "Linguistic Elements"), Transforms(
    206                                                                                                                                                                                         SectionId.Misc), Identity(
    207                                                                                                                                                                                             SectionId.Special), Version(
    208                                                                                                                                                                                                 SectionId.Special), Suppress(
    209                                                                                                                                                                                                     SectionId.Special), Deprecated(
    210                                                                                                                                                                                                         SectionId.Special), Unknown(
    211                                                                                                                                                                                                             SectionId.Special), C_NAmerica(
    212                                                                                                                                                                                                                 SectionId.Currencies,
    213                                                                                                                                                                                                                 "North America (C)"), //need to add (C) to differentiate from Timezone territories
    214         C_SAmerica(SectionId.Currencies, "South America (C)"), C_NWEurope(SectionId.Currencies, "Northern/Western Europe"), C_SEEurope(SectionId.Currencies,
    215             "Southern/Eastern Europe"), C_NAfrica(SectionId.Currencies, "Northern Africa"), C_WAfrica(SectionId.Currencies, "Western Africa"), C_MAfrica(
    216                 SectionId.Currencies, "Middle Africa"), C_EAfrica(SectionId.Currencies, "Eastern Africa"), C_SAfrica(SectionId.Currencies,
    217                     "Southern Africa"), C_WAsia(SectionId.Currencies, "Western Asia (C)"), C_CAsia(SectionId.Currencies, "Central Asia (C)"), C_EAsia(
    218                         SectionId.Currencies, "Eastern Asia (C)"), C_SAsia(SectionId.Currencies, "Southern Asia (C)"), C_SEAsia(SectionId.Currencies,
    219                             "Southeast Asia (C)"), C_Oceania(SectionId.Currencies, "Oceania (C)"), C_Unknown(SectionId.Currencies, "Unknown Region (C)"),
    220         // BCP47
    221         u_Extension(SectionId.BCP47), t_Extension(SectionId.BCP47),
    222         // Supplemental
    223         Alias(SectionId.Supplemental), IdValidity(SectionId.Supplemental), Locale(SectionId.Supplemental), RegionMapping(SectionId.Supplemental), WZoneMapping(
    224             SectionId.Supplemental), Transform(SectionId.Supplemental), UnitPreferences(SectionId.Supplemental), Likely(SectionId.Supplemental), LanguageMatch(
    225                 SectionId.Supplemental), TerritoryInfo(SectionId.Supplemental), LanguageInfo(SectionId.Supplemental), LanguageGroup(
    226                     SectionId.Supplemental), Fallback(SectionId.Supplemental), Gender(SectionId.Supplemental), Metazone(SectionId.Supplemental), NumberSystem(
    227                         SectionId.Supplemental), Plural(SectionId.Supplemental), PluralRange(SectionId.Supplemental), Containment(
    228                             SectionId.Supplemental), Currency(SectionId.Supplemental), Calendar(SectionId.Supplemental), WeekData(
    229                                 SectionId.Supplemental), Measurement(SectionId.Supplemental), Language(SectionId.Supplemental), RBNF(
    230                                     SectionId.Supplemental), Segmentation(SectionId.Supplemental), DayPeriod(SectionId.Supplemental),
    231 
    232         Category(SectionId.Characters),
    233         // [Smileys, People, Animals & Nature, Food & Drink, Travel & Places, Activities, Objects, Symbols, Flags]
    234         Smileys(SectionId.Characters), People(SectionId.Characters), Animals_Nature(SectionId.Characters, "Animals & Nature"), Food_Drink(SectionId.Characters,
    235             "Food & Drink"), Travel_Places(SectionId.Characters, "Travel & Places"), Activities(SectionId.Characters), Objects(
    236                 SectionId.Characters), Symbols2(SectionId.Characters), Flags(SectionId.Characters), Component(SectionId.Characters),
    237 
    238         Typography(SectionId.Characters),
    239         ;
    240 
    241         private final SectionId sectionId;
    242 
    243         private PageId(SectionId sectionId, String... alternateNames) {
    244             this.sectionId = sectionId;
    245             SectionIdToPageIds.put(sectionId, this);
    246             PageIdNames.add(this, alternateNames);
    247         }
    248 
    249         /**
    250          * Construct a pageId given a string
    251          *
    252          * @param name
    253          * @return
    254          */
    255         public static PageId forString(String name) {
    256             try {
    257                 return PageIdNames.forString(name);
    258             } catch (Exception e) {
    259                 throw new ICUException("No PageId for " + name, e);
    260             }
    261         }
    262 
    263         /**
    264          * Returns the page id
    265          *
    266          * @return a page ID, such as 'Languages'
    267          */
    268         public String toString() {
    269             return PageIdNames.toString(this);
    270         }
    271 
    272         /**
    273          * Get the containing section id, such as 'Code Lists'
    274          *
    275          * @return the containing section ID
    276          */
    277         public SectionId getSectionId() {
    278             return sectionId;
    279         }
    280     }
    281 
    282     private final SectionId sectionId;
    283     private final PageId pageId;
    284     private final String header;
    285     private final String code;
    286     private final String originalPath;
    287     private final SurveyToolStatus status;
    288 
    289     // Used for ordering
    290     private final int headerOrder;
    291     private final int codeOrder;
    292     private final SubstringOrder codeSuborder;
    293 
    294     static final Pattern SEMI = PatternCache.get("\\s*;\\s*");
    295     static final Matcher ALT_MATCHER = PatternCache.get(
    296         "\\[@alt=\"([^\"]*+)\"]")
    297         .matcher("");
    298 
    299     static final Collator alphabetic = CLDRConfig.getInstance().getCollatorRoot();
    300 
    301 //    static final RuleBasedCollator alphabetic = (RuleBasedCollator) Collator
    302 //            .getInstance(ULocale.ENGLISH);
    303 //    static {
    304 //        alphabetic.setNumericCollation(true);
    305 //        alphabetic.freeze();
    306 //    }
    307 
    308     static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
    309     static final Map<String, String> metazoneToContinent = supplementalDataInfo
    310         .getMetazoneToContinentMap();
    311     static final StandardCodes standardCode = StandardCodes.make();
    312     static final Map<String, String> metazoneToPageTerritory = new HashMap<String, String>();
    313     static {
    314         Map<String, Map<String, String>> metazoneToRegionToZone = supplementalDataInfo.getMetazoneToRegionToZone();
    315         for (Entry<String, Map<String, String>> metazoneEntry : metazoneToRegionToZone.entrySet()) {
    316             String metazone = metazoneEntry.getKey();
    317             String worldZone = metazoneEntry.getValue().get("001");
    318             String territory = Containment.getRegionFromZone(worldZone);
    319             if (territory == null) {
    320                 territory = "ZZ";
    321             }
    322             // Russia, Antarctica => territory
    323             // in Australasia, Asia, S. America => subcontinent
    324             // in N. America => N. America (grouping of 3 subcontinents)
    325             // in everything else => continent
    326             if (territory.equals("RU") || territory.equals("AQ")) {
    327                 metazoneToPageTerritory.put(metazone, territory);
    328             } else {
    329                 String continent = Containment.getContinent(territory);
    330                 String subcontinent = Containment.getSubcontinent(territory);
    331                 if (continent.equals("142")) { // Asia
    332                     metazoneToPageTerritory.put(metazone, subcontinent);
    333                 } else if (continent.equals("019")) { // Americas
    334                     metazoneToPageTerritory.put(metazone, subcontinent.equals("005") ? subcontinent : "003");
    335                 } else if (subcontinent.equals("053")) { // Australasia
    336                     metazoneToPageTerritory.put(metazone, subcontinent);
    337                 } else {
    338                     metazoneToPageTerritory.put(metazone, continent);
    339                 }
    340             }
    341         }
    342     }
    343 
    344     /**
    345      * @param section
    346      * @param sectionOrder
    347      * @param page
    348      * @param pageOrder
    349      * @param header
    350      * @param headerOrder
    351      * @param code
    352      * @param codeOrder
    353      * @param suborder
    354      * @param status
    355      */
    356     private PathHeader(SectionId sectionId, PageId pageId, String header,
    357         int headerOrder, String code, int codeOrder, SubstringOrder suborder, SurveyToolStatus status,
    358         String originalPath) {
    359         this.sectionId = sectionId;
    360         this.pageId = pageId;
    361         this.header = header;
    362         this.headerOrder = headerOrder;
    363         this.code = code;
    364         this.codeOrder = codeOrder;
    365         this.codeSuborder = suborder;
    366         this.originalPath = originalPath;
    367         this.status = status;
    368     }
    369 
    370     /**
    371      * Return a factory for use in creating the headers. This should be cached.
    372      * The calls are thread-safe. The englishFile sets a static for now; after
    373      * the first time, null can be passed.
    374      *
    375      * @param englishFile
    376      */
    377     public static Factory getFactory(CLDRFile englishFile) {
    378         if (factorySingleton == null) {
    379             if (englishFile == null) {
    380                 throw new IllegalArgumentException("English CLDRFile must not be null");
    381             }
    382             if (!englishFile.getLocaleID().equals(ULocale.ENGLISH.getBaseName())) {
    383                 throw new IllegalArgumentException("PathHeader's CLDRFile must be '" +
    384                     ULocale.ENGLISH.getBaseName() + "', but found '" + englishFile.getLocaleID() + "'");
    385             }
    386             factorySingleton = new Factory(englishFile);
    387         }
    388         return factorySingleton;
    389     }
    390 
    391     /**
    392      * @deprecated
    393      */
    394     public String getSection() {
    395         return sectionId.toString();
    396     }
    397 
    398     public SectionId getSectionId() {
    399         return sectionId;
    400     }
    401 
    402     /**
    403      * @deprecated
    404      */
    405     public String getPage() {
    406         return pageId.toString();
    407     }
    408 
    409     public PageId getPageId() {
    410         return pageId;
    411     }
    412 
    413     public String getHeader() {
    414         return header == null ? "" : header;
    415     }
    416 
    417     public String getCode() {
    418         return code;
    419     }
    420 
    421     public String getHeaderCode() {
    422         return getHeader() + ": " + getCode();
    423     }
    424 
    425     public String getOriginalPath() {
    426         return originalPath;
    427     }
    428 
    429     public SurveyToolStatus getSurveyToolStatus() {
    430         return status;
    431     }
    432 
    433     @Override
    434     public String toString() {
    435         return sectionId
    436             + "\t" + pageId
    437             + "\t" + header // + "\t" + headerOrder
    438             + "\t" + code // + "\t" + codeOrder
    439             ;
    440     }
    441 
    442     @Override
    443     public int compareTo(PathHeader other) {
    444         // Within each section, order alphabetically if the integer orders are
    445         // not different.
    446         try {
    447             int result;
    448             if (0 != (result = sectionId.compareTo(other.sectionId))) {
    449                 return result;
    450             }
    451             if (0 != (result = pageId.compareTo(other.pageId))) {
    452                 return result;
    453             }
    454             if (0 != (result = headerOrder - other.headerOrder)) {
    455                 return result;
    456             }
    457             if (0 != (result = alphabeticCompare(header, other.header))) {
    458                 return result;
    459             }
    460             if (0 != (result = codeOrder - other.codeOrder)) {
    461                 return result;
    462             }
    463             if (codeSuborder != null) { // do all three cases, for transitivity
    464                 if (other.codeSuborder != null) {
    465                     if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) {
    466                         return result;
    467                     }
    468                 } else {
    469                     return 1; // if codeSuborder != null (and other.codeSuborder
    470                     // == null), it is greater
    471                 }
    472             } else if (other.codeSuborder != null) {
    473                 return -1; // if codeSuborder == null (and other.codeSuborder !=
    474                 // null), it is greater
    475             }
    476             if (0 != (result = alphabeticCompare(code, other.code))) {
    477                 return result;
    478             }
    479             if (!SKIP_ORIGINAL_PATH && 0 != (result = alphabeticCompare(originalPath, other.originalPath))) {
    480                 return result;
    481             }
    482             return 0;
    483         } catch (RuntimeException e) {
    484             throw new IllegalArgumentException("Internal problem comparing " + this + " and " + other, e);
    485         }
    486     }
    487 
    488     public int compareHeader(PathHeader other) {
    489         int result;
    490         if (0 != (result = headerOrder - other.headerOrder)) {
    491             return result;
    492         }
    493         if (0 != (result = alphabeticCompare(header, other.header))) {
    494             return result;
    495         }
    496         return result;
    497     }
    498 
    499     public int compareCode(PathHeader other) {
    500         int result;
    501         if (0 != (result = codeOrder - other.codeOrder)) {
    502             return result;
    503         }
    504         if (codeSuborder != null) { // do all three cases, for transitivity
    505             if (other.codeSuborder != null) {
    506                 if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) {
    507                     return result;
    508                 }
    509             } else {
    510                 return 1; // if codeSuborder != null (and other.codeSuborder
    511                 // == null), it is greater
    512             }
    513         } else if (other.codeSuborder != null) {
    514             return -1; // if codeSuborder == null (and other.codeSuborder !=
    515             // null), it is greater
    516         }
    517         if (0 != (result = alphabeticCompare(code, other.code))) {
    518             return result;
    519         }
    520         return result;
    521     }
    522 
    523     @Override
    524     public boolean equals(Object obj) {
    525         PathHeader other;
    526         try {
    527             other = (PathHeader) obj;
    528         } catch (Exception e) {
    529             return false;
    530         }
    531         return sectionId == other.sectionId && pageId == other.pageId
    532             && header.equals(other.header) && code.equals(other.code);
    533     }
    534 
    535     @Override
    536     public int hashCode() {
    537         return sectionId.hashCode() ^ pageId.hashCode() ^ header.hashCode() ^ code.hashCode();
    538     }
    539 
    540     public static class Factory implements Transform<String, PathHeader> {
    541         static final RegexLookup<RawData> lookup = RegexLookup
    542             .of(new PathHeaderTransform())
    543             .setPatternTransform(
    544                 RegexLookup.RegexFinderTransformPath)
    545             .loadFromFile(
    546                 PathHeader.class,
    547                 "data/PathHeader.txt");
    548         // synchronized with lookup
    549         static final Output<String[]> args = new Output<String[]>();
    550         // synchronized with lookup
    551         static final Counter<RawData> counter = new Counter<RawData>();
    552         // synchronized with lookup
    553         static final Map<RawData, String> samples = new HashMap<RawData, String>();
    554         // synchronized with lookup
    555         static int order;
    556         static SubstringOrder suborder;
    557 
    558         static final Map<String, PathHeader> cache = new HashMap<String, PathHeader>();
    559         // synchronized with cache
    560         static final Map<SectionId, Map<PageId, SectionPage>> sectionToPageToSectionPage = new EnumMap<SectionId, Map<PageId, SectionPage>>(
    561             SectionId.class);
    562         static final Relation<SectionPage, String> sectionPageToPaths = Relation
    563             .of(new TreeMap<SectionPage, Set<String>>(),
    564                 HashSet.class);
    565         private static CLDRFile englishFile;
    566         private Set<String> matchersFound = new HashSet<String>();
    567 
    568         /**
    569          * Create a factory for creating PathHeaders.
    570          *
    571          * @param englishFile
    572          *            - only sets the file (statically!) if not already set.
    573          */
    574         private Factory(CLDRFile englishFile) {
    575             setEnglishCLDRFileIfNotSet(englishFile); // temporary
    576         }
    577 
    578         /**
    579          * Returns true if we set it, false if set before.
    580          *
    581          * @param englishFile2
    582          * @return
    583          */
    584         private static boolean setEnglishCLDRFileIfNotSet(CLDRFile englishFile2) {
    585             synchronized (Factory.class) {
    586                 if (englishFile != null) {
    587                     return false;
    588                 }
    589                 englishFile = englishFile2;
    590                 return true;
    591             }
    592         }
    593 
    594         /**
    595          * Use only when trying to find unmatched patterns
    596          */
    597         public void clearCache() {
    598             synchronized (cache) {
    599                 cache.clear();
    600             }
    601         }
    602 
    603         /**
    604          * Return the PathHeader for a given path. Thread-safe.
    605          */
    606         public PathHeader fromPath(String path) {
    607             return fromPath(path, null);
    608         }
    609 
    610         /**
    611          * Return the PathHeader for a given path. Thread-safe.
    612          */
    613         public PathHeader transform(String path) {
    614             return fromPath(path, null);
    615         }
    616 
    617         /**
    618          * Return the PathHeader for a given path. Thread-safe.
    619          * @param failures a list of failures to add to.
    620          */
    621         public PathHeader fromPath(String path, List<String> failures) {
    622             if (path == null) {
    623                 throw new NullPointerException("Path cannot be null");
    624             }
    625             synchronized (cache) {
    626                 PathHeader old = cache.get(path);
    627                 if (old != null) {
    628                     return old;
    629                 }
    630             }
    631             synchronized (lookup) {
    632                 String cleanPath = path;
    633                 // special handling for alt
    634                 String alt = null;
    635                 int altPos = cleanPath.indexOf("[@alt=");
    636                 if (altPos >= 0 && !cleanPath.endsWith("/symbol[@alt=\"narrow\"]")) {
    637                     if (ALT_MATCHER.reset(cleanPath).find()) {
    638                         alt = ALT_MATCHER.group(1);
    639                         cleanPath = cleanPath.substring(0, ALT_MATCHER.start())
    640                             + cleanPath.substring(ALT_MATCHER.end());
    641                         int pos = alt.indexOf("proposed");
    642                         if (pos >= 0 && !path.startsWith("//ldml/collations")) {
    643                             alt = pos == 0 ? null : alt.substring(0, pos - 1);
    644                             // drop "proposed",
    645                             // change "xxx-proposed" to xxx.
    646                         }
    647                     } else {
    648                         throw new IllegalArgumentException();
    649                     }
    650                 }
    651                 Output<Finder> matcherFound = new Output<Finder>();
    652                 RawData data = lookup.get(cleanPath, null, args, matcherFound, failures);
    653                 if (data == null) {
    654                     return null;
    655                 }
    656                 matchersFound.add(matcherFound.value.toString());
    657                 counter.add(data, 1);
    658                 if (!samples.containsKey(data)) {
    659                     samples.put(data, cleanPath);
    660                 }
    661                 try {
    662                     PathHeader result = new PathHeader(
    663                         SectionId.forString(fix(data.section, 0)),
    664                         PageId.forString(fix(data.page, 0)),
    665                         fix(data.header, data.headerOrder),
    666                         order, // only valid after call to fix. TODO, make
    667                         // this cleaner
    668                         fix(data.code + (alt == null ? "" : ("-" + alt)), data.codeOrder),
    669                         order, // only valid after call to fix
    670                         suborder,
    671                         data.status,
    672                         path);
    673                     synchronized (cache) {
    674                         PathHeader old = cache.get(path);
    675                         if (old == null) {
    676                             cache.put(path, result);
    677                         } else {
    678                             result = old;
    679                         }
    680                         Map<PageId, SectionPage> pageToPathHeaders = sectionToPageToSectionPage
    681                             .get(result.sectionId);
    682                         if (pageToPathHeaders == null) {
    683                             sectionToPageToSectionPage.put(result.sectionId, pageToPathHeaders = new EnumMap<PageId, SectionPage>(PageId.class));
    684                         }
    685                         SectionPage sectionPage = pageToPathHeaders.get(result.pageId);
    686                         if (sectionPage == null) {
    687                             sectionPage = new SectionPage(result.sectionId, result.pageId);
    688                             pageToPathHeaders.put(result.pageId, sectionPage);
    689                         }
    690                         sectionPageToPaths.put(sectionPage, path);
    691                     }
    692                     return result;
    693                 } catch (Exception e) {
    694                     throw new IllegalArgumentException(
    695                         "Probably mismatch in Page/Section enum, or too few capturing groups in regex for " + cleanPath,
    696                         e);
    697                 }
    698             }
    699         }
    700 
    701         private static class SectionPage implements Comparable<SectionPage> {
    702             private final SectionId sectionId;
    703             private final PageId pageId;
    704 
    705             public SectionPage(SectionId sectionId, PageId pageId) {
    706                 this.sectionId = sectionId;
    707                 this.pageId = pageId;
    708             }
    709 
    710             @Override
    711             public int compareTo(SectionPage other) {
    712                 // Within each section, order alphabetically if the integer
    713                 // orders are
    714                 // not different.
    715                 int result;
    716                 if (0 != (result = sectionId.compareTo(other.sectionId))) {
    717                     return result;
    718                 }
    719                 if (0 != (result = pageId.compareTo(other.pageId))) {
    720                     return result;
    721                 }
    722                 return 0;
    723             }
    724 
    725             @Override
    726             public boolean equals(Object obj) {
    727                 PathHeader other;
    728                 try {
    729                     other = (PathHeader) obj;
    730                 } catch (Exception e) {
    731                     return false;
    732                 }
    733                 return sectionId == other.sectionId && pageId == other.pageId;
    734             }
    735 
    736             @Override
    737             public int hashCode() {
    738                 return sectionId.hashCode() ^ pageId.hashCode();
    739             }
    740         }
    741 
    742         /**
    743          * Returns a set of paths currently associated with the given section
    744          * and page.
    745          * <p>
    746          * <b>Warning:</b>
    747          * <ol>
    748          * <li>The set may not be complete for a cldrFile unless all of paths in
    749          * the file have had fromPath called. And this includes getExtraPaths().
    750          * </li>
    751          * <li>The set may include paths that have no value in the current
    752          * cldrFile.</li>
    753          * <li>The set may be empty, if the section/page aren't valid.</li>
    754          * </ol>
    755          * Thread-safe.
    756          *
    757          * @target a collection where the paths are to be returned.
    758          */
    759         public static Set<String> getCachedPaths(SectionId sectionId, PageId page) {
    760             Set<String> target = new HashSet<String>();
    761             synchronized (cache) {
    762                 Map<PageId, SectionPage> pageToSectionPage = sectionToPageToSectionPage
    763                     .get(sectionId);
    764                 if (pageToSectionPage == null) {
    765                     return target;
    766                 }
    767                 SectionPage sectionPage = pageToSectionPage.get(page);
    768                 if (sectionPage == null) {
    769                     return target;
    770                 }
    771                 Set<String> set = sectionPageToPaths.getAll(sectionPage);
    772                 target.addAll(set);
    773             }
    774             return target;
    775         }
    776 
    777         /**
    778          * Return the Sections and Pages that are in defined, for display in
    779          * menus. Both are ordered.
    780          */
    781         public static Relation<SectionId, PageId> getSectionIdsToPageIds() {
    782             SectionIdToPageIds.freeze(); // just in case
    783             return SectionIdToPageIds;
    784         }
    785 
    786         /**
    787          * Return paths that have the designated section and page.
    788          *
    789          * @param sectionId
    790          * @param pageId
    791          * @param file
    792          */
    793         public Iterable<String> filterCldr(SectionId sectionId, PageId pageId, CLDRFile file) {
    794             return new FilteredIterable(sectionId, pageId, file);
    795         }
    796 
    797         /**
    798          * Return the names for Sections and Pages that are defined, for display
    799          * in menus. Both are ordered.
    800          *
    801          * @deprecated Use getSectionIdsToPageIds
    802          */
    803         public static LinkedHashMap<String, Set<String>> getSectionsToPages() {
    804             LinkedHashMap<String, Set<String>> sectionsToPages = new LinkedHashMap<String, Set<String>>();
    805             for (PageId pageId : PageId.values()) {
    806                 String sectionId2 = pageId.getSectionId().toString();
    807                 Set<String> pages = sectionsToPages.get(sectionId2);
    808                 if (pages == null) {
    809                     sectionsToPages.put(sectionId2, pages = new LinkedHashSet<String>());
    810                 }
    811                 pages.add(pageId.toString());
    812             }
    813             return sectionsToPages;
    814         }
    815 
    816         /**
    817          * @deprecated, use the filterCldr with the section/page ids.
    818          */
    819         public Iterable<String> filterCldr(String section, String page, CLDRFile file) {
    820             return new FilteredIterable(section, page, file);
    821         }
    822 
    823         private class FilteredIterable implements Iterable<String>, SimpleIterator<String> {
    824             private final SectionId sectionId;
    825             private final PageId pageId;
    826             private final Iterator<String> fileIterator;
    827 
    828             FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file) {
    829                 this.sectionId = sectionId;
    830                 this.pageId = pageId;
    831                 this.fileIterator = file.fullIterable().iterator();
    832             }
    833 
    834             public FilteredIterable(String section, String page, CLDRFile file) {
    835                 this(SectionId.forString(section), PageId.forString(page), file);
    836             }
    837 
    838             @Override
    839             public Iterator<String> iterator() {
    840                 return With.toIterator(this);
    841             }
    842 
    843             @Override
    844             public String next() {
    845                 while (fileIterator.hasNext()) {
    846                     String path = fileIterator.next();
    847                     PathHeader pathHeader = fromPath(path);
    848                     if (sectionId == pathHeader.sectionId && pageId == pathHeader.pageId) {
    849                         return path;
    850                     }
    851                 }
    852                 return null;
    853             }
    854         }
    855 
    856         private static class ChronologicalOrder {
    857             private Map<String, Integer> map = new HashMap<String, Integer>();
    858             private String item;
    859             private int order;
    860             private ChronologicalOrder toClear;
    861 
    862             ChronologicalOrder(ChronologicalOrder toClear) {
    863                 this.toClear = toClear;
    864             }
    865 
    866             int getOrder() {
    867                 return order;
    868             }
    869 
    870             public String set(String itemToOrder) {
    871                 if (itemToOrder.startsWith("*")) {
    872                     item = itemToOrder.substring(1, itemToOrder.length());
    873                     return item; // keep old order
    874                 }
    875                 item = itemToOrder;
    876                 Integer old = map.get(item);
    877                 if (old != null) {
    878                     order = old.intValue();
    879                 } else {
    880                     order = map.size();
    881                     map.put(item, order);
    882                     clearLower();
    883                 }
    884                 return item;
    885             }
    886 
    887             private void clearLower() {
    888                 if (toClear != null) {
    889                     toClear.map.clear();
    890                     toClear.order = 0;
    891                     toClear.clearLower();
    892                 }
    893             }
    894         }
    895 
    896         static class RawData {
    897             static ChronologicalOrder codeOrdering = new ChronologicalOrder(null);
    898             static ChronologicalOrder headerOrdering = new ChronologicalOrder(codeOrdering);
    899 
    900             public RawData(String source) {
    901                 String[] split = SEMI.split(source);
    902                 section = split[0];
    903                 // HACK
    904                 if (section.equals("Timezones") && split[1].equals("Indian")) {
    905                     page = "Indian2";
    906                 } else {
    907                     page = split[1];
    908                 }
    909 
    910                 header = headerOrdering.set(split[2]);
    911                 headerOrder = headerOrdering.getOrder();
    912 
    913                 code = codeOrdering.set(split[3]);
    914                 codeOrder = codeOrdering.getOrder();
    915 
    916                 status = split.length < 5 ? SurveyToolStatus.READ_WRITE : SurveyToolStatus.valueOf(split[4]);
    917             }
    918 
    919             public final String section;
    920             public final String page;
    921             public final String header;
    922             public final int headerOrder;
    923             public final String code;
    924             public final int codeOrder;
    925             public final SurveyToolStatus status;
    926 
    927             @Override
    928             public String toString() {
    929                 return section + "\t"
    930                     + page + "\t"
    931                     + header + "\t" + headerOrder + "\t"
    932                     + code + "\t" + codeOrder + "\t"
    933                     + status;
    934             }
    935         }
    936 
    937         static class PathHeaderTransform implements Transform<String, RawData> {
    938             @Override
    939             public RawData transform(String source) {
    940                 return new RawData(source);
    941             }
    942         }
    943 
    944         /**
    945          * Internal data, for testing and debugging.
    946          *
    947          * @deprecated
    948          */
    949         public class CounterData extends Row.R4<String, RawData, String, String> {
    950             public CounterData(String a, RawData b, String c) {
    951                 super(a, b, c == null ? "no sample" : c, c == null ? "no sample" : fromPath(c)
    952                     .toString());
    953             }
    954         }
    955 
    956         /**
    957          * Get the internal data, for testing and debugging.
    958          *
    959          * @deprecated
    960          */
    961         public Counter<CounterData> getInternalCounter() {
    962             synchronized (lookup) {
    963                 Counter<CounterData> result = new Counter<CounterData>();
    964                 for (Map.Entry<Finder, RawData> foo : lookup) {
    965                     Finder finder = foo.getKey();
    966                     RawData data = foo.getValue();
    967                     long count = counter.get(data);
    968                     result.add(new CounterData(finder.toString(), data, samples.get(data)), count);
    969                 }
    970                 return result;
    971             }
    972         }
    973 
    974         static Map<String, Transform<String, String>> functionMap = new HashMap<String, Transform<String, String>>();
    975         static String[] months = { "Jan", "Feb", "Mar",
    976             "Apr", "May", "Jun",
    977             "Jul", "Aug", "Sep",
    978             "Oct", "Nov", "Dec",
    979         "Und" };
    980         static List<String> days = Arrays.asList("sun", "mon",
    981             "tue", "wed", "thu",
    982             "fri", "sat");
    983         static List<String> unitOrder = DtdData.unitOrder.getOrder();
    984         static final MapComparator<String> dayPeriods = new MapComparator<String>().add(
    985             "am", "pm", "midnight", "noon",
    986             "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2").freeze();
    987         // static Map<String, String> likelySubtags =
    988         // supplementalDataInfo.getLikelySubtags();
    989         static LikelySubtags likelySubtags = new LikelySubtags();
    990         static HyphenSplitter hyphenSplitter = new HyphenSplitter();
    991         static Transform<String, String> catFromTerritory;
    992         static Transform<String, String> catFromTimezone;
    993         static {
    994             // Put any new functions used in PathHeader.txt in here.
    995             // To change the order of items within a section or heading, set
    996             // order/suborder to be the relative position of the current item.
    997             functionMap.put("month", new Transform<String, String>() {
    998                 public String transform(String source) {
    999                     int m = Integer.parseInt(source);
   1000                     order = m;
   1001                     return months[m - 1];
   1002                 }
   1003             });
   1004             functionMap.put("count", new Transform<String, String>() {
   1005                 public String transform(String source) {
   1006                     suborder = new SubstringOrder(source);
   1007                     return source;
   1008                 }
   1009             });
   1010             functionMap.put("count2", new Transform<String, String>() {
   1011                 public String transform(String source) {
   1012                     int pos = source.indexOf('-');
   1013                     source = pos + source.substring(pos);
   1014                     suborder = new SubstringOrder(source); // make 10000-...
   1015                     // into 5-
   1016                     return source;
   1017                 }
   1018             });
   1019             functionMap.put("currencySymbol", new Transform<String, String>() {
   1020                 public String transform(String source) {
   1021                     order = 901;
   1022                     if (source.endsWith("narrow")) {
   1023                         order = 902;
   1024                     }
   1025                     if (source.endsWith("variant")) {
   1026                         order = 903;
   1027                     }
   1028                     return source;
   1029                 }
   1030             });
   1031             functionMap.put("unitCount", new Transform<String, String>() {
   1032                 public String transform(String source) {
   1033                     String[] unitLengths = { "long", "short", "narrow" };
   1034                     int pos = 9;
   1035                     for (int i = 0; i < unitLengths.length; i++) {
   1036                         if (source.startsWith(unitLengths[i])) {
   1037                             pos = i;
   1038                             continue;
   1039                         }
   1040                     }
   1041                     order = pos;
   1042                     suborder = new SubstringOrder(pos + "-" + source); //
   1043                     return source;
   1044                 }
   1045             });
   1046             functionMap.put("day", new Transform<String, String>() {
   1047                 public String transform(String source) {
   1048                     int m = days.indexOf(source);
   1049                     order = m;
   1050                     return source;
   1051                 }
   1052             });
   1053             functionMap.put("dayPeriod", new Transform<String, String>() {
   1054                 public String transform(String source) {
   1055                     try {
   1056                         order = dayPeriods.getNumericOrder(source);
   1057                     } catch (Exception e) {
   1058                         // if an old item is tried, like "evening", this will fail.
   1059                         // so that old data still works, hack this.
   1060                         order = Math.abs(source.hashCode() << 16);
   1061                     }
   1062                     return source;
   1063                 }
   1064             });
   1065             functionMap.put("calendar", new Transform<String, String>() {
   1066                 Map<String, String> fixNames = Builder.with(new HashMap<String, String>())
   1067                     .put("islamicc", "Islamic Civil")
   1068                     .put("roc", "Minguo")
   1069                     .put("Ethioaa", "Ethiopic Amete Alem")
   1070                     .put("Gregory", "Gregorian")
   1071                     .put("iso8601", "ISO 8601")
   1072                     .freeze();
   1073 
   1074                 public String transform(String source) {
   1075                     String result = fixNames.get(source);
   1076                     return result != null ? result : UCharacter.toTitleCase(source, null);
   1077                 }
   1078             });
   1079 
   1080             functionMap.put("calField", new Transform<String, String>() {
   1081                 public String transform(String source) {
   1082                     String[] fields = source.split(":", 3);
   1083                     order = 0;
   1084                     final List<String> widthValues = Arrays.asList(
   1085                         "wide", "abbreviated", "short", "narrow");
   1086                     final List<String> calendarFieldValues = Arrays.asList(
   1087                         "Eras",
   1088                         "Quarters",
   1089                         "Months",
   1090                         "Days",
   1091                         "DayPeriods",
   1092                         "Formats");
   1093                     final List<String> calendarFormatTypes = Arrays.asList(
   1094                         "Standard",
   1095                         "Flexible",
   1096                         "Intervals");
   1097                     final List<String> calendarContextTypes = Arrays.asList(
   1098                         "none",
   1099                         "format",
   1100                         "stand-alone");
   1101                     final List<String> calendarFormatSubtypes = Arrays.asList(
   1102                         "date",
   1103                         "time",
   1104                         "time12",
   1105                         "time24",
   1106                         "dateTime",
   1107                         "fallback");
   1108 
   1109                     Map<String, String> fixNames = Builder.with(new HashMap<String, String>())
   1110                         .put("DayPeriods", "Day Periods")
   1111                         .put("format", "Formatting")
   1112                         .put("stand-alone", "Standalone")
   1113                         .put("none", "")
   1114                         .put("date", "Date Formats")
   1115                         .put("time", "Time Formats")
   1116                         .put("time12", "12 Hour Time Formats")
   1117                         .put("time24", "24 Hour Time Formats")
   1118                         .put("dateTime", "Date & Time Combination Formats")
   1119                         .freeze();
   1120 
   1121                     if (calendarFieldValues.contains(fields[0])) {
   1122                         order = calendarFieldValues.indexOf(fields[0]) * 100;
   1123                     } else {
   1124                         order = calendarFieldValues.size() * 100;
   1125                     }
   1126 
   1127                     if (fields[0].equals("Formats")) {
   1128                         if (calendarFormatTypes.contains(fields[1])) {
   1129                             order += calendarFormatTypes.indexOf(fields[1]) * 10;
   1130                         } else {
   1131                             order += calendarFormatTypes.size() * 10;
   1132                         }
   1133                         if (calendarFormatSubtypes.contains(fields[2])) {
   1134                             order += calendarFormatSubtypes.indexOf(fields[2]);
   1135                         } else {
   1136                             order += calendarFormatSubtypes.size();
   1137                         }
   1138                     } else {
   1139                         if (widthValues.contains(fields[1])) {
   1140                             order += widthValues.indexOf(fields[1]) * 10;
   1141                         } else {
   1142                             order += widthValues.size() * 10;
   1143                         }
   1144                         if (calendarContextTypes.contains(fields[2])) {
   1145                             order += calendarContextTypes.indexOf(fields[2]);
   1146                         } else {
   1147                             order += calendarContextTypes.size();
   1148                         }
   1149                     }
   1150 
   1151                     String[] fixedFields = new String[fields.length];
   1152                     for (int i = 0; i < fields.length; i++) {
   1153                         String s = fixNames.get(fields[i]);
   1154                         fixedFields[i] = s != null ? s : fields[i];
   1155                     }
   1156 
   1157                     return fixedFields[0] +
   1158                         " - " + fixedFields[1] +
   1159                         (fixedFields[2].length() > 0 ? " - " + fixedFields[2] : "");
   1160                 }
   1161             });
   1162 
   1163             functionMap.put("titlecase", new Transform<String, String>() {
   1164                 public String transform(String source) {
   1165                     return UCharacter.toTitleCase(source, null);
   1166                 }
   1167             });
   1168             functionMap.put("categoryFromScript", new Transform<String, String>() {
   1169                 public String transform(String source) {
   1170                     String script = hyphenSplitter.split(source);
   1171                     Info info = ScriptMetadata.getInfo(script);
   1172                     if (info == null) {
   1173                         info = ScriptMetadata.getInfo("Zzzz");
   1174                     }
   1175                     order = 100 - info.idUsage.ordinal();
   1176                     return info.idUsage.name;
   1177                 }
   1178             });
   1179             functionMap.put("categoryFromKey", new Transform<String, String>() {
   1180                 Map<String, String> fixNames = Builder.with(new HashMap<String, String>())
   1181                     .put("lb", "Line Break")
   1182                     .put("hc", "Hour Cycle")
   1183                     .put("ms", "Measurement System")
   1184                     .put("cf", "Currency Format")
   1185                     .freeze();
   1186 
   1187                 public String transform(String source) {
   1188                     String fixedName = fixNames.get(source);
   1189                     return fixedName != null ? fixedName : source;
   1190                 }
   1191             });
   1192             functionMap.put("languageSection", new Transform<String, String>() {
   1193                 char[] languageRangeStartPoints = { 'A', 'E', 'K', 'O', 'T' };
   1194                 char[] languageRangeEndPoints = { 'D', 'J', 'N', 'S', 'Z' };
   1195 
   1196                 public String transform(String source0) {
   1197                     char firstLetter = getEnglishFirstLetter(source0).charAt(0);
   1198                     for (int i = 0; i < languageRangeStartPoints.length; i++) {
   1199                         if (firstLetter >= languageRangeStartPoints[i] && firstLetter <= languageRangeEndPoints[i]) {
   1200                             return "Languages (" + Character.toUpperCase(languageRangeStartPoints[i]) + "-" + Character.toUpperCase(languageRangeEndPoints[i])
   1201                             + ")";
   1202                         }
   1203                     }
   1204                     return "Languages";
   1205                 }
   1206             });
   1207             functionMap.put("firstLetter", new Transform<String, String>() {
   1208                 public String transform(String source0) {
   1209                     return getEnglishFirstLetter(source0);
   1210                 }
   1211             });
   1212             functionMap.put("languageSort", new Transform<String, String>() {
   1213                 public String transform(String source0) {
   1214                     String languageOnlyPart;
   1215                     int underscorePos = source0.indexOf("_");
   1216                     if (underscorePos > 0) {
   1217                         languageOnlyPart = source0.substring(0, underscorePos);
   1218                     } else {
   1219                         languageOnlyPart = source0;
   1220                     }
   1221 
   1222                     return englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart) + " \u25BA " + source0;
   1223                 }
   1224             });
   1225             functionMap.put("scriptFromLanguage", new Transform<String, String>() {
   1226                 public String transform(String source0) {
   1227                     String language = hyphenSplitter.split(source0);
   1228                     String script = likelySubtags.getLikelyScript(language);
   1229                     if (script == null) {
   1230                         script = likelySubtags.getLikelyScript(language);
   1231                     }
   1232                     String scriptName = englishFile.getName(CLDRFile.SCRIPT_NAME, script);
   1233                     return "Languages in " + (script.equals("Hans") || script.equals("Hant") ? "Han Script"
   1234                         : scriptName.endsWith(" Script") ? scriptName
   1235                             : scriptName + " Script");
   1236                 }
   1237             });
   1238             functionMap.put("categoryFromTerritory",
   1239                 catFromTerritory = new Transform<String, String>() {
   1240                 public String transform(String source) {
   1241                     String territory = getSubdivisionsTerritory(source, null);
   1242                     String container = Containment.getContainer(territory);
   1243                     order = Containment.getOrder(territory);
   1244                     return englishFile.getName(CLDRFile.TERRITORY_NAME, container);
   1245                 }
   1246             });
   1247             functionMap.put("territorySection", new Transform<String, String>() {
   1248                 final Set<String> specialRegions = new HashSet<String>(Arrays.asList("EZ", "EU", "QO", "UN", "ZZ"));
   1249 
   1250                 public String transform(String source0) {
   1251                     // support subdivisions
   1252                     String theTerritory = getSubdivisionsTerritory(source0, null);
   1253                     try {
   1254                         if (specialRegions.contains(theTerritory)
   1255                             || theTerritory.charAt(0) < 'A' && Integer.valueOf(theTerritory) > 0) {
   1256                             return "Geographic Regions";
   1257                         }
   1258                     } catch (NumberFormatException ex) {
   1259                     }
   1260                     String theContinent = Containment.getContinent(theTerritory);
   1261                     String theSubContinent;
   1262                     switch (theContinent) { // was Integer.valueOf
   1263                     case "019": // Americas - For the territorySection, we just group North America & South America
   1264                         final String subcontinent = Containment.getSubcontinent(theTerritory);
   1265                         theSubContinent = subcontinent.equals("005") ? "005" : "003"; // was Integer.valueOf(subcontinent) == 5
   1266                         return "Territories (" + englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent) + ")";
   1267                     case "001":
   1268                     case "ZZ":
   1269                         return "Geographic Regions"; // not in containment
   1270                     default:
   1271                         return "Territories (" + englishFile.getName(CLDRFile.TERRITORY_NAME, theContinent) + ")";
   1272                     }
   1273                 }
   1274             });
   1275             functionMap.put("categoryFromTimezone",
   1276                 catFromTimezone = new Transform<String, String>() {
   1277                 public String transform(String source0) {
   1278                     String territory = Containment.getRegionFromZone(source0);
   1279                     if (territory == null) {
   1280                         territory = "ZZ";
   1281                     }
   1282                     return catFromTerritory.transform(territory);
   1283                 }
   1284             });
   1285             functionMap.put("timeZonePage", new Transform<String, String>() {
   1286                 Set<String> singlePageTerritories = new HashSet<String>(Arrays.asList("AQ", "RU", "ZZ"));
   1287 
   1288                 public String transform(String source0) {
   1289                     String theTerritory = Containment.getRegionFromZone(source0);
   1290                     if (theTerritory == null || theTerritory == "001") {
   1291                         theTerritory = "ZZ";
   1292                     }
   1293                     if (singlePageTerritories.contains(theTerritory)) {
   1294                         return englishFile.getName(CLDRFile.TERRITORY_NAME, theTerritory);
   1295                     }
   1296                     String theContinent = Containment.getContinent(theTerritory);
   1297                     final String subcontinent = Containment.getSubcontinent(theTerritory);
   1298                     String theSubContinent;
   1299                     switch (Integer.valueOf(theContinent)) {
   1300                     case 9: // Oceania - For the timeZonePage, we group Australasia on one page, and the rest of Oceania on the other.
   1301                         try {
   1302                             theSubContinent = subcontinent.equals("053") ? "053" : "009"; // was Integer.valueOf(subcontinent) == 53
   1303                         } catch (NumberFormatException ex) {
   1304                             theSubContinent = "009";
   1305                         }
   1306                         return englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent);
   1307                     case 19: // Americas - For the timeZonePage, we just group North America & South America
   1308                         theSubContinent = Integer.valueOf(subcontinent) == 5 ? "005" : "003";
   1309                         return englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent);
   1310                     case 142: // Asia
   1311                         return englishFile.getName(CLDRFile.TERRITORY_NAME, subcontinent);
   1312                     default:
   1313                         return englishFile.getName(CLDRFile.TERRITORY_NAME, theContinent);
   1314                     }
   1315                 }
   1316             });
   1317 
   1318             functionMap.put("timezoneSorting", new Transform<String, String>() {
   1319                 public String transform(String source) {
   1320                     final List<String> codeValues = Arrays.asList(
   1321                         "generic-long",
   1322                         "generic-short",
   1323                         "standard-long",
   1324                         "standard-short",
   1325                         "daylight-long",
   1326                         "daylight-short");
   1327                     if (codeValues.contains(source)) {
   1328                         order = codeValues.indexOf(source);
   1329                     } else {
   1330                         order = codeValues.size();
   1331                     }
   1332                     return source;
   1333                 }
   1334             });
   1335 
   1336             functionMap.put("tzdpField", new Transform<String, String>() {
   1337                 public String transform(String source) {
   1338                     Map<String, String> fieldNames = Builder.with(new HashMap<String, String>())
   1339                         .put("regionFormat", "Region Format - Generic")
   1340                         .put("regionFormat-standard", "Region Format - Standard")
   1341                         .put("regionFormat-daylight", "Region Format - Daylight")
   1342                         .put("gmtFormat", "GMT Format")
   1343                         .put("hourFormat", "GMT Hours/Minutes Format")
   1344                         .put("gmtZeroFormat", "GMT Zero Format")
   1345                         .put("fallbackFormat", "Location Fallback Format")
   1346                         .freeze();
   1347                     final List<String> fieldOrder = Arrays.asList(
   1348                         "regionFormat",
   1349                         "regionFormat-standard",
   1350                         "regionFormat-daylight",
   1351                         "gmtFormat",
   1352                         "hourFormat",
   1353                         "gmtZeroFormat",
   1354                         "fallbackFormat");
   1355 
   1356                     if (fieldOrder.contains(source)) {
   1357                         order = fieldOrder.indexOf(source);
   1358                     } else {
   1359                         order = fieldOrder.size();
   1360                     }
   1361 
   1362                     String result = fieldNames.get(source);
   1363                     return result == null ? source : result;
   1364                 }
   1365             });
   1366             functionMap.put("unit", new Transform<String, String>() {
   1367                 public String transform(String source) {
   1368                     int m = unitOrder.indexOf(source);
   1369                     order = m;
   1370                     return source.substring(source.indexOf('-') + 1);
   1371                 }
   1372             });
   1373 
   1374             functionMap.put("numericSort", new Transform<String, String>() {
   1375                 // Probably only works well for small values, like -5 through +4.
   1376                 public String transform(String source) {
   1377                     Integer pos = Integer.valueOf(source) + 5;
   1378                     suborder = new SubstringOrder(pos.toString());
   1379                     return source;
   1380                 }
   1381             });
   1382 
   1383             functionMap.put("metazone", new Transform<String, String>() {
   1384 
   1385                 public String transform(String source) {
   1386                     if (PathHeader.UNIFORM_CONTINENTS) {
   1387                         String container = getMetazonePageTerritory(source);
   1388                         order = Containment.getOrder(container);
   1389                         return englishFile.getName(CLDRFile.TERRITORY_NAME, container);
   1390                     } else {
   1391                         String continent = metazoneToContinent.get(source);
   1392                         if (continent == null) {
   1393                             continent = "UnknownT";
   1394                         }
   1395                         return continent;
   1396                     }
   1397                 }
   1398             });
   1399 
   1400             Object[][] ctto = {
   1401                 { "BUK", "MM" },
   1402                 { "CSD", "RS" },
   1403                 { "CSK", "CZ" },
   1404                 { "DDM", "DE" },
   1405                 { "EUR", "ZZ" },
   1406                 { "RHD", "ZW" },
   1407                 { "SUR", "RU" },
   1408                 { "TPE", "TL" },
   1409                 { "XAG", "ZZ" },
   1410                 { "XAU", "ZZ" },
   1411                 { "XBA", "ZZ" },
   1412                 { "XBB", "ZZ" },
   1413                 { "XBC", "ZZ" },
   1414                 { "XBD", "ZZ" },
   1415                 { "XDR", "ZZ" },
   1416                 { "XEU", "ZZ" },
   1417                 { "XFO", "ZZ" },
   1418                 { "XFU", "ZZ" },
   1419                 { "XPD", "ZZ" },
   1420                 { "XPT", "ZZ" },
   1421                 { "XRE", "ZZ" },
   1422                 { "XSU", "ZZ" },
   1423                 { "XTS", "ZZ" },
   1424                 { "XUA", "ZZ" },
   1425                 { "XXX", "ZZ" },
   1426                 { "YDD", "YE" },
   1427                 { "YUD", "RS" },
   1428                 { "YUM", "RS" },
   1429                 { "YUN", "RS" },
   1430                 { "YUR", "RS" },
   1431                 { "ZRN", "CD" },
   1432                 { "ZRZ", "CD" },
   1433             };
   1434 
   1435             Object[][] sctc = {
   1436                 { "Northern America", "North America (C)" },
   1437                 { "Central America", "North America (C)" },
   1438                 { "Caribbean", "North America (C)" },
   1439                 { "South America", "South America (C)" },
   1440                 { "Northern Africa", "Northern Africa" },
   1441                 { "Western Africa", "Western Africa" },
   1442                 { "Middle Africa", "Middle Africa" },
   1443                 { "Eastern Africa", "Eastern Africa" },
   1444                 { "Southern Africa", "Southern Africa" },
   1445                 { "Europe", "Northern/Western Europe" },
   1446                 { "Northern Europe", "Northern/Western Europe" },
   1447                 { "Western Europe", "Northern/Western Europe" },
   1448                 { "Eastern Europe", "Southern/Eastern Europe" },
   1449                 { "Southern Europe", "Southern/Eastern Europe" },
   1450                 { "Western Asia", "Western Asia (C)" },
   1451                 { "Central Asia", "Central Asia (C)" },
   1452                 { "Eastern Asia", "Eastern Asia (C)" },
   1453                 { "Southern Asia", "Southern Asia (C)" },
   1454                 { "Southeast Asia", "Southeast Asia (C)" },
   1455                 { "Australasia", "Oceania (C)" },
   1456                 { "Melanesia", "Oceania (C)" },
   1457                 { "Micronesian Region", "Oceania (C)" }, // HACK
   1458                 { "Polynesia", "Oceania (C)" },
   1459                 { "Unknown Region", "Unknown Region (C)" },
   1460             };
   1461 
   1462             final Map<String, String> currencyToTerritoryOverrides = CldrUtility.asMap(ctto);
   1463             final Map<String, String> subContinentToContinent = CldrUtility.asMap(sctc);
   1464             final Set<String> fundCurrencies = new HashSet<String>(Arrays.asList("CHE", "CHW", "CLF", "COU", "ECV", "MXV", "USN", "USS", "UYI", "XEU", "ZAL"));
   1465             final Set<String> offshoreCurrencies = new HashSet<String>(Arrays.asList("CNH"));
   1466             // TODO: Put this into supplementalDataInfo ?
   1467 
   1468             functionMap.put("categoryFromCurrency", new Transform<String, String>() {
   1469                 public String transform(String source0) {
   1470                     String tenderOrNot = "";
   1471                     String territory = likelySubtags.getLikelyTerritoryFromCurrency(source0);
   1472                     if (territory == null) {
   1473                         String tag;
   1474                         if (fundCurrencies.contains(source0)) {
   1475                             tag = " (fund)";
   1476                         } else if (offshoreCurrencies.contains(source0)) {
   1477                             tag = " (offshore)";
   1478                         } else {
   1479                             tag = " (old)";
   1480                         }
   1481                         tenderOrNot = ": " + source0 + tag;
   1482                     }
   1483                     if (currencyToTerritoryOverrides.keySet().contains(source0)) {
   1484                         territory = currencyToTerritoryOverrides.get(source0);
   1485                     } else if (territory == null) {
   1486                         territory = source0.substring(0, 2);
   1487                     }
   1488 
   1489                     if (territory.equals("ZZ")) {
   1490                         order = 999;
   1491                         return englishFile.getName(CLDRFile.TERRITORY_NAME, territory) + ": " + source0;
   1492                     } else {
   1493                         return catFromTerritory.transform(territory) + ": "
   1494                             + englishFile.getName(CLDRFile.TERRITORY_NAME, territory)
   1495                             + tenderOrNot;
   1496                     }
   1497                 }
   1498             });
   1499             functionMap.put("continentFromCurrency", new Transform<String, String>() {
   1500                 public String transform(String source0) {
   1501                     String subContinent;
   1502                     String territory = likelySubtags.getLikelyTerritoryFromCurrency(source0);
   1503                     if (currencyToTerritoryOverrides.keySet().contains(source0)) {
   1504                         territory = currencyToTerritoryOverrides.get(source0);
   1505                     } else if (territory == null) {
   1506                         territory = source0.substring(0, 2);
   1507                     }
   1508 
   1509                     if (territory.equals("ZZ")) {
   1510                         order = 999;
   1511                         subContinent = englishFile.getName(CLDRFile.TERRITORY_NAME, territory);
   1512                     } else {
   1513                         subContinent = catFromTerritory.transform(territory);
   1514                     }
   1515 
   1516                     String result = subContinentToContinent.get(subContinent); //the continent is the last word in the territory representation
   1517                     return result;
   1518                 }
   1519             });
   1520             functionMap.put("numberingSystem", new Transform<String, String>() {
   1521                 public String transform(String source0) {
   1522                     if ("latn".equals(source0)) {
   1523                         return "";
   1524                     }
   1525                     String displayName = englishFile.getStringValue("//ldml/localeDisplayNames/types/type[@key=\"numbers\"][@type=\""
   1526                         + source0 + "\"]");
   1527                     return "using " + (displayName == null ? source0 : displayName + " (" + source0 + ")");
   1528                 }
   1529             });
   1530 
   1531             functionMap.put("datefield", new Transform<String, String>() {
   1532                 private final String[] datefield = {
   1533                     "era", "era-short", "era-narrow",
   1534                     "century", "century-short", "century-narrow",
   1535                     "year", "year-short", "year-narrow",
   1536                     "quarter", "quarter-short", "quarter-narrow",
   1537                     "month", "month-short", "month-narrow",
   1538                     "week", "week-short", "week-narrow",
   1539                     "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow",
   1540                     "day", "day-short", "day-narrow",
   1541                     "dayOfYear", "dayOfYear-short", "dayOfYear-narrow",
   1542                     "weekday", "weekday-short", "weekday-narrow",
   1543                     "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow",
   1544                     "dayperiod", "dayperiod-short", "dayperiod-narrow",
   1545                     "zone", "zone-short", "zone-narrow",
   1546                     "hour", "hour-short", "hour-narrow",
   1547                     "minute", "minute-short", "minute-narrow",
   1548                     "second", "second-short", "second-narrow",
   1549                     "millisecond", "millisecond-short", "millisecond-narrow",
   1550                     "microsecond", "microsecond-short", "microsecond-narrow",
   1551                     "nanosecond", "nanosecond-short", "nanosecond-narrow",
   1552 
   1553                 };
   1554 
   1555                 public String transform(String source) {
   1556                     order = getIndex(source, datefield);
   1557                     return source;
   1558                 }
   1559             });
   1560             // //ldml/dates/fields/field[@type="%A"]/relative[@type="%A"]
   1561             functionMap.put("relativeDate", new Transform<String, String>() {
   1562                 private final String[] relativeDateField = {
   1563                     "year", "year-short", "year-narrow",
   1564                     "quarter", "quarter-short", "quarter-narrow",
   1565                     "month", "month-short", "month-narrow",
   1566                     "week", "week-short", "week-narrow",
   1567                     "day", "day-short", "day-narrow",
   1568                     "hour", "hour-short", "hour-narrow",
   1569                     "minute", "minute-short", "minute-narrow",
   1570                     "second", "second-short", "second-narrow",
   1571                     "sun", "sun-short", "sun-narrow",
   1572                     "mon", "mon-short", "mon-narrow",
   1573                     "tue", "tue-short", "tue-narrow",
   1574                     "wed", "wed-short", "wed-narrow",
   1575                     "thu", "thu-short", "thu-narrow",
   1576                     "fri", "fri-short", "fri-narrow",
   1577                     "sat", "sat-short", "sat-narrow",
   1578                 };
   1579                 private final String[] longNames = {
   1580                     "Year", "Year Short", "Year Narrow",
   1581                     "Quarter", "Quarter Short", "Quarter Narrow",
   1582                     "Month", "Month Short", "Month Narrow",
   1583                     "Week", "Week Short", "Week Narrow",
   1584                     "Day", "Day Short", "Day Narrow",
   1585                     "Hour", "Hour Short", "Hour Narrow",
   1586                     "Minute", "Minute Short", "Minute Narrow",
   1587                     "Second", "Second Short", "Second Narrow",
   1588                     "Sunday", "Sunday Short", "Sunday Narrow",
   1589                     "Monday", "Monday Short", "Monday Narrow",
   1590                     "Tuesday", "Tuesday Short", "Tuesday Narrow",
   1591                     "Wednesday", "Wednesday Short", "Wednesday Narrow",
   1592                     "Thursday", "Thursday Short", "Thursday Narrow",
   1593                     "Friday", "Friday Short", "Friday Narrow",
   1594                     "Saturday", "Saturday Short", "Saturday Narrow",
   1595                 };
   1596 
   1597                 public String transform(String source) {
   1598                     order = getIndex(source, relativeDateField) + 100;
   1599                     return "Relative " + longNames[getIndex(source, relativeDateField)];
   1600                 }
   1601             });
   1602             // Sorts numberSystem items (except for decimal formats).
   1603             functionMap.put("number", new Transform<String, String>() {
   1604                 private final String[] symbols = { "decimal", "group",
   1605                     "plusSign", "minusSign", "percentSign", "perMille",
   1606                     "exponential", "superscriptingExponent",
   1607                     "infinity", "nan", "list", "currencies"
   1608                 };
   1609 
   1610                 public String transform(String source) {
   1611                     String[] parts = source.split("-");
   1612                     order = getIndex(parts[0], symbols);
   1613                     // e.g. "currencies-one"
   1614                     if (parts.length > 1) {
   1615                         suborder = new SubstringOrder(parts[1]);
   1616                     }
   1617                     return source;
   1618                 }
   1619             });
   1620             functionMap.put("numberFormat", new Transform<String, String>() {
   1621                 public String transform(String source) {
   1622                     final List<String> fieldOrder = Arrays.asList(
   1623                         "standard-decimal",
   1624                         "standard-currency",
   1625                         "standard-currency-accounting",
   1626                         "standard-percent",
   1627                         "standard-scientific");
   1628 
   1629                     if (fieldOrder.contains(source)) {
   1630                         order = fieldOrder.indexOf(source);
   1631                     } else {
   1632                         order = fieldOrder.size();
   1633                     }
   1634 
   1635                     return source;
   1636                 }
   1637             });
   1638 
   1639             functionMap.put("localePattern", new Transform<String, String>() {
   1640                 public String transform(String source) {
   1641                     // Put localeKeyTypePattern behind localePattern and
   1642                     // localeSeparator.
   1643                     if (source.equals("localeKeyTypePattern")) {
   1644                         order = 10;
   1645                     }
   1646                     return source;
   1647                 }
   1648             });
   1649             functionMap.put("listOrder", new Transform<String, String>() {
   1650                 private String[] listParts = { "2", "start", "middle", "end" };
   1651 
   1652                 @Override
   1653                 public String transform(String source) {
   1654                     order = getIndex(source, listParts);
   1655                     return source;
   1656                 }
   1657             });
   1658             functionMap.put("alphaOrder", new Transform<String, String>() {
   1659                 @Override
   1660                 public String transform(String source) {
   1661                     order = 0;
   1662                     return source;
   1663                 }
   1664             });
   1665             functionMap.put("transform", new Transform<String, String>() {
   1666                 Splitter commas = Splitter.on(',').trimResults();
   1667 
   1668                 @Override
   1669                 public String transform(String source) {
   1670                     List<String> parts = commas.splitToList(source);
   1671                     return parts.get(1)
   1672                         + (parts.get(0).equals("both") ? "" : "")
   1673                         + parts.get(2)
   1674                         + (parts.size() > 3 ? "/" + parts.get(3) : "");
   1675                 }
   1676             });
   1677             functionMap.put("major", new Transform<String, String>() {
   1678                 @Override
   1679                 public String transform(String source) {
   1680                     String major = Emoji.getMajorCategory(source);
   1681                     // check that result is reasonable by running through PageId.
   1682                     switch(major) {
   1683                     default:
   1684                         PageId pageId2 = PageId.forString(major);
   1685                         if (pageId2.getSectionId() != SectionId.Characters) {
   1686                             if (pageId2 == PageId.Symbols) {
   1687                                 pageId2 = PageId.Symbols2;
   1688                             }
   1689                         }
   1690                         return pageId2.toString();
   1691                     case "Smileys & People":
   1692                         String minorCat = Emoji.getMinorCategory(source);
   1693                         if (minorCat.equals("skin-tone") || minorCat.equals("hair-style")) {
   1694                             return PageId.Component.toString();
   1695                         } else if (!minorCat.contains("face")) {
   1696                             return PageId.People.toString();
   1697                         } else {
   1698                             return PageId.Smileys.toString();
   1699                         }
   1700                     }
   1701                 }
   1702             });
   1703             functionMap.put("minor", new Transform<String, String>() {
   1704                 @Override
   1705                 public String transform(String source) {
   1706                     String minorCat = Emoji.getMinorCategory(source);
   1707                     order = Emoji.getMinorToOrder(minorCat);
   1708                     return minorCat;
   1709                 }
   1710             });
   1711 
   1712         }
   1713 
   1714         private static int getIndex(String item, String[] array) {
   1715             for (int i = 0; i < array.length; i++) {
   1716                 if (item.equals(array[i])) {
   1717                     return i;
   1718                 }
   1719             }
   1720             return -1;
   1721         }
   1722 
   1723         private static String getEnglishFirstLetter(String s) {
   1724             String languageOnlyPart;
   1725             int underscorePos = s.indexOf("_");
   1726             if (underscorePos > 0) {
   1727                 languageOnlyPart = s.substring(0, underscorePos);
   1728             } else {
   1729                 languageOnlyPart = s;
   1730             }
   1731             return englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart).substring(0, 1).toUpperCase();
   1732         }
   1733 
   1734         static class HyphenSplitter {
   1735             String main;
   1736             String extras;
   1737 
   1738             String split(String source) {
   1739                 int hyphenPos = source.indexOf('-');
   1740                 if (hyphenPos < 0) {
   1741                     main = source;
   1742                     extras = "";
   1743                 } else {
   1744                     main = source.substring(0, hyphenPos);
   1745                     extras = source.substring(hyphenPos);
   1746                 }
   1747                 return main;
   1748             }
   1749         }
   1750 
   1751         /**
   1752          * This converts "functions", like &month, and sets the order.
   1753          *
   1754          * @param input
   1755          * @param order
   1756          * @return
   1757          */
   1758         private static String fix(String input, int orderIn) {
   1759             if (input.contains("")) {
   1760                 int debug = 0;
   1761             }
   1762             String oldInput = input;
   1763             input = RegexLookup.replace(input, args.value);
   1764             order = orderIn;
   1765             suborder = null;
   1766             int pos = 0;
   1767             while (true) {
   1768                 int functionStart = input.indexOf('&', pos);
   1769                 if (functionStart < 0) {
   1770                     return input;
   1771                 }
   1772                 int functionEnd = input.indexOf('(', functionStart);
   1773                 int argEnd = input.indexOf(')', functionEnd);
   1774                 Transform<String, String> func = functionMap.get(input.substring(functionStart + 1,
   1775                     functionEnd));
   1776                 final String arg = input.substring(functionEnd + 1, argEnd);
   1777                 String temp = func.transform(arg);
   1778                 if (temp == null) {
   1779                     func.transform(arg);
   1780                     throw new IllegalArgumentException("Function returns invalid results for " + arg + ".");
   1781                 }
   1782                 input = input.substring(0, functionStart) + temp + input.substring(argEnd + 1);
   1783                 pos = functionStart + temp.length();
   1784             }
   1785         }
   1786 
   1787         /**
   1788          * Collect all the paths for a CLDRFile, and make sure that they have
   1789          * cached PathHeaders
   1790          *
   1791          * @param file
   1792          * @return immutable set of paths in the file
   1793          */
   1794         public Set<String> pathsForFile(CLDRFile file) {
   1795             // make sure we cache all the path headers
   1796             Set<String> filePaths = CollectionUtilities.addAll(file.fullIterable().iterator(), new HashSet<String>());
   1797             for (String path : filePaths) {
   1798                 try {
   1799                     fromPath(path); // call to make sure cached
   1800                 } catch (Throwable t) {
   1801                     // ... some other exception
   1802                 }
   1803             }
   1804             return Collections.unmodifiableSet(filePaths);
   1805         }
   1806 
   1807         /**
   1808          * Returns those regexes that were never matched.
   1809          * @return
   1810          */
   1811         public Set<String> getUnmatchedRegexes() {
   1812             Map<String, RawData> outputUnmatched = new LinkedHashMap<String, RawData>();
   1813             lookup.getUnmatchedPatterns(matchersFound, outputUnmatched);
   1814             return outputUnmatched.keySet();
   1815         }
   1816 
   1817         public String getRegexInfo() {
   1818             return lookup.toString();
   1819         }
   1820     }
   1821 
   1822     /**
   1823      * Return the territory used for the title of the Metazone page in the
   1824      * Survey Tool.
   1825      *
   1826      * @param source
   1827      * @return
   1828      */
   1829     public static String getMetazonePageTerritory(String source) {
   1830         String result = metazoneToPageTerritory.get(source);
   1831         return result == null ? "ZZ" : result;
   1832     }
   1833 
   1834     private static final List<String> COUNTS = Arrays.asList("displayName", "zero", "one", "two", "few", "many", "other", "per");
   1835 
   1836     private static int alphabeticCompare(String aa, String bb) {
   1837         // A frozen Collator is thread-safe.
   1838         return alphabetic.compare(aa, bb);
   1839     }
   1840 
   1841     public enum BaseUrl {
   1842         //http://st.unicode.org/smoketest/survey?_=af&strid=55053dffac611328
   1843         //http://st.unicode.org/cldr-apps/survey?_=en&strid=3cd31261bf6738e1
   1844         SMOKE("http://st.unicode.org/smoketest/survey"), PRODUCTION("http://st.unicode.org/cldr-apps/survey");
   1845         final String base;
   1846 
   1847         private BaseUrl(String url) {
   1848             base = url;
   1849         }
   1850     }
   1851 
   1852     /**
   1853      * @deprecated, use CLDRConfig.urls().forPathHeader() instead.
   1854      * @param baseUrl
   1855      * @param locale
   1856      * @return
   1857      */
   1858     public String getUrl(BaseUrl baseUrl, String locale) {
   1859         return getUrl(baseUrl.base, locale);
   1860     }
   1861 
   1862     /**
   1863      * @deprecated, use CLDRConfig.urls().forPathHeader() instead.
   1864      * @param baseUrl
   1865      * @param locale
   1866      * @return
   1867      */
   1868     public String getUrl(String baseUrl, String locale) {
   1869         return getUrl(baseUrl, locale, getOriginalPath());
   1870     }
   1871 
   1872     /**
   1873      * Map http://st.unicode.org/smoketest/survey  to http://st.unicode.org/smoketest etc
   1874      * @param str
   1875      * @return
   1876      */
   1877     public static String trimLast(String str) {
   1878         int n = str.lastIndexOf('/');
   1879         if (n == -1) return "";
   1880         return str.substring(0, n + 1);
   1881     }
   1882 
   1883     /**
   1884      * @deprecated use CLDRConfig.urls()
   1885      * @param baseUrl
   1886      * @param locale
   1887      * @param path
   1888      * @return
   1889      */
   1890     public static String getUrl(String baseUrl, String locale, String path) {
   1891         return trimLast(baseUrl) + "v#/" + locale + "//" + StringId.getHexId(path);
   1892     }
   1893 
   1894     // eg http://st.unicode.org/cldr-apps/survey?_=fr&x=Locale%20Name%20Patterns
   1895     /**
   1896      * @deprecated use CLDRConfig.urls()
   1897      * @param baseUrl
   1898      * @param locale
   1899      * @param subsection
   1900      * @return
   1901      */
   1902     public static String getPageUrl(String baseUrl, String locale, PageId subsection) {
   1903         return trimLast(baseUrl) + "v#/" + locale + "/" + subsection + "/";
   1904     }
   1905 
   1906     /**
   1907      * @deprecated use CLDRConfig.urls()
   1908      * @param baseUrl
   1909      * @param file
   1910      * @param path
   1911      * @return
   1912      */
   1913     public static String getLinkedView(String baseUrl, CLDRFile file, String path) {
   1914         String value = file.getStringValue(path);
   1915         if (value == null) {
   1916             return null;
   1917         }
   1918         return SECTION_LINK + PathHeader.getUrl(baseUrl, file.getLocaleID(), path) + "'><em>view</em></a>";
   1919     }
   1920 
   1921     /**
   1922      * If a subdivision, return the (uppercased) territory and if suffix != null, the suffix. Otherwise return the input as is.
   1923      * @param input
   1924      * @param suffix
   1925      * @return
   1926      */
   1927     private static String getSubdivisionsTerritory(String input, Output<String> suffix) {
   1928         String theTerritory;
   1929         if (StandardCodes.LstrType.subdivision.isWellFormed(input)) {
   1930             int territoryEnd = input.charAt(0) < 'A' ? 3 : 2;
   1931             theTerritory = input.substring(0, territoryEnd).toUpperCase(Locale.ROOT);
   1932             if (suffix != null) {
   1933                 suffix.value = input.substring(territoryEnd);
   1934             }
   1935         } else {
   1936             theTerritory = input;
   1937             if (suffix != null) {
   1938                 suffix.value = "";
   1939             }
   1940         }
   1941         return theTerritory;
   1942     }
   1943 }
   1944