Home | History | Annotate | Download | only in util
      1 /*
      2  **********************************************************************
      3  * Copyright (c) 2002-2013, International Business Machines
      4  * Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  * Author: Mark Davis
      7  **********************************************************************
      8  */
      9 package org.unicode.cldr.util;
     10 
     11 import java.io.BufferedReader;
     12 import java.io.File;
     13 import java.io.FileReader;
     14 import java.io.IOException;
     15 import java.io.InputStream;
     16 import java.io.PrintWriter;
     17 import java.lang.reflect.Constructor;
     18 import java.lang.reflect.Method;
     19 import java.nio.charset.Charset;
     20 import java.util.ArrayList;
     21 import java.util.Arrays;
     22 import java.util.Calendar;
     23 import java.util.Collection;
     24 import java.util.Collections;
     25 import java.util.Comparator;
     26 import java.util.Date;
     27 import java.util.EnumSet;
     28 import java.util.HashMap;
     29 import java.util.HashSet;
     30 import java.util.Iterator;
     31 import java.util.LinkedHashMap;
     32 import java.util.LinkedHashSet;
     33 import java.util.List;
     34 import java.util.Map;
     35 import java.util.Map.Entry;
     36 import java.util.Objects;
     37 import java.util.Set;
     38 import java.util.SortedMap;
     39 import java.util.SortedSet;
     40 import java.util.TreeMap;
     41 import java.util.TreeSet;
     42 import java.util.concurrent.ConcurrentHashMap;
     43 import java.util.regex.Matcher;
     44 import java.util.regex.Pattern;
     45 
     46 import org.unicode.cldr.draft.FileUtilities;
     47 import org.unicode.cldr.util.RegexLookup.Finder;
     48 
     49 import com.google.common.base.Splitter;
     50 import com.ibm.icu.dev.test.TestFmwk;
     51 import com.ibm.icu.impl.Utility;
     52 import com.ibm.icu.text.DateFormat;
     53 import com.ibm.icu.text.SimpleDateFormat;
     54 import com.ibm.icu.text.Transform;
     55 import com.ibm.icu.text.Transliterator;
     56 import com.ibm.icu.text.UTF16;
     57 import com.ibm.icu.text.UnicodeSet;
     58 import com.ibm.icu.text.UnicodeSetIterator;
     59 import com.ibm.icu.util.Freezable;
     60 import com.ibm.icu.util.Output;
     61 import com.ibm.icu.util.TimeZone;
     62 
     63 public class CldrUtility {
     64 
     65     public static final Charset UTF8 = Charset.forName("utf-8");
     66     public static final boolean BETA = false;
     67 
     68     public static final String LINE_SEPARATOR = "\n";
     69     public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*");
     70 
     71     private static final boolean HANDLEFILE_SHOW_SKIP = false;
     72     // Constant for "". Indicates that a child locale has no value for a
     73     // path even though a parent does.
     74     public static final String NO_INHERITANCE_MARKER = new String(new char[] { 0x2205, 0x2205, 0x2205 });
     75 
     76     /**
     77      * Define the constant INHERITANCE_MARKER for "", used by Survey Tool to indicate a "passthru" vote to the parent locale.
     78      * If CLDRFile ever finds this value in a data field, writing of the field should be suppressed.
     79      */
     80     public static final String INHERITANCE_MARKER = new String(new char[] { 0x2191, 0x2191, 0x2191 });
     81 
     82     public static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
     83 
     84     /**
     85      * Very simple class, used to replace variables in a string. For example
     86      * <p>
     87      *
     88      * <pre>
     89      * static VariableReplacer langTag = new VariableReplacer()
     90      * 			.add("$alpha", "[a-zA-Z]")
     91      * 			.add("$digit", "[0-9]")
     92      * 			.add("$alphanum", "[a-zA-Z0-9]")
     93      * 			.add("$x", "[xX]");
     94      * 			...
     95      * 			String langTagPattern = langTag.replace(...);
     96      * </pre>
     97      */
     98     public static class VariableReplacer {
     99         // simple implementation for now
    100         private Map<String, String> m = new TreeMap<String, String>(Collections.reverseOrder());
    101 
    102         public VariableReplacer add(String variable, String value) {
    103             m.put(variable, value);
    104             return this;
    105         }
    106 
    107         public String replace(String source) {
    108             String oldSource;
    109             do {
    110                 oldSource = source;
    111                 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) {
    112                     String variable = it.next();
    113                     String value = m.get(variable);
    114                     source = replaceAll(source, variable, value);
    115                 }
    116             } while (!source.equals(oldSource));
    117             return source;
    118         }
    119 
    120         public String replaceAll(String source, String key, String value) {
    121             while (true) {
    122                 int pos = source.indexOf(key);
    123                 if (pos < 0) return source;
    124                 source = source.substring(0, pos) + value + source.substring(pos + key.length());
    125             }
    126         }
    127     }
    128 
    129     public interface LineHandler {
    130         /**
    131          * Return false if line was skipped
    132          *
    133          * @param line
    134          * @return
    135          */
    136         boolean handle(String line) throws Exception;
    137     }
    138 
    139     public static String getPath(String path, String filename) {
    140         if (path == null) {
    141             return null;
    142         }
    143         final File file = filename == null ? new File(path)
    144             : new File(path, filename);
    145         try {
    146             return file.getCanonicalPath() + File.separatorChar;
    147         } catch (IOException e) {
    148             return file.getPath() + File.separatorChar;
    149         }
    150     }
    151 
    152     static String getPath(String path) {
    153         return getPath(path, null);
    154     }
    155 
    156     public static final String ANALYTICS = "<script type=\"text/javascript\">\n"
    157         + "var gaJsHost = ((\"https:\" == document.location.protocol) ? \"https://ssl.\" : \"http://www.\");\n"
    158         + "document.write(unescape(\"%3Cscript src='\" + gaJsHost + \"google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E\"));\n"
    159         + "</script>\n"
    160         + "<script type=\"text/javascript\">\n"
    161         + "try {\n"
    162         + "var pageTracker = _gat._getTracker(\"UA-7672775-1\");\n"
    163         + "pageTracker._trackPageview();\n"
    164         + "} catch(err) {}</script>";
    165 
    166     public static final List<String> MINIMUM_LANGUAGES = Arrays.asList(new String[] { "ar", "en", "de", "fr", "hi",
    167         "it", "es", "pt", "ru", "zh", "ja" }); // plus language itself
    168     public static final List<String> MINIMUM_TERRITORIES = Arrays.asList(new String[] { "US", "GB", "DE", "FR", "IT",
    169         "JP", "CN", "IN", "RU", "BR" });
    170 
    171     public interface LineComparer {
    172         static final int LINES_DIFFERENT = -1, LINES_SAME = 0, SKIP_FIRST = 1, SKIP_SECOND = 2;
    173 
    174         /**
    175          * Returns LINES_DIFFERENT, LINES_SAME, or if one of the lines is ignorable, SKIP_FIRST or SKIP_SECOND
    176          *
    177          * @param line1
    178          * @param line2
    179          * @return
    180          */
    181         int compare(String line1, String line2);
    182     }
    183 
    184     public static class SimpleLineComparator implements LineComparer {
    185         public static final int TRIM = 1, SKIP_SPACES = 2, SKIP_EMPTY = 4, SKIP_CVS_TAGS = 8;
    186         StringIterator si1 = new StringIterator();
    187         StringIterator si2 = new StringIterator();
    188         int flags;
    189 
    190         public SimpleLineComparator(int flags) {
    191             this.flags = flags;
    192         }
    193 
    194         public int compare(String line1, String line2) {
    195             // first, see if we want to skip one or the other lines
    196             int skipper = 0;
    197             if (line1 == null) {
    198                 skipper = SKIP_FIRST;
    199             } else {
    200                 if ((flags & TRIM) != 0) line1 = line1.trim();
    201                 if ((flags & SKIP_EMPTY) != 0 && line1.length() == 0) skipper = SKIP_FIRST;
    202             }
    203             if (line2 == null) {
    204                 skipper = SKIP_SECOND;
    205             } else {
    206                 if ((flags & TRIM) != 0) line2 = line2.trim();
    207                 if ((flags & SKIP_EMPTY) != 0 && line2.length() == 0) skipper += SKIP_SECOND;
    208             }
    209             if (skipper != 0) {
    210                 if (skipper == SKIP_FIRST + SKIP_SECOND) return LINES_SAME; // ok, don't skip both
    211                 return skipper;
    212             }
    213 
    214             // check for null
    215             if (line1 == null) {
    216                 if (line2 == null) return LINES_SAME;
    217                 return LINES_DIFFERENT;
    218             }
    219             if (line2 == null) {
    220                 return LINES_DIFFERENT;
    221             }
    222 
    223             // now check equality
    224             if (line1.equals(line2)) return LINES_SAME;
    225 
    226             // if not equal, see if we are skipping spaces
    227             if ((flags & SKIP_CVS_TAGS) != 0) {
    228                 if (line1.indexOf('$') >= 0 && line2.indexOf('$') >= 0) {
    229                     line1 = stripTags(line1);
    230                     line2 = stripTags(line2);
    231                     if (line1.equals(line2)) return LINES_SAME;
    232                 } else if (line1.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")
    233                     && line2.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")) {
    234                     return LINES_SAME;
    235                 }
    236             }
    237             if ((flags & SKIP_SPACES) != 0 && si1.set(line1).matches(si2.set(line2))) return LINES_SAME;
    238             return LINES_DIFFERENT;
    239         }
    240 
    241         // private Matcher dtdMatcher = PatternCache.get(
    242         // "\\Q<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/\\E.*\\Q/ldml.dtd\">\\E").matcher("");
    243 
    244         private String[] CVS_TAGS = { "Revision", "Date" };
    245 
    246         private String stripTags(String line) {
    247             // $
    248             // Revision: 8994 $
    249             // $
    250             // Date: 2013-07-03 21:31:17 +0200 (Wed, 03 Jul 2013) $
    251             int pos = line.indexOf('$');
    252             if (pos < 0) return line;
    253             pos++;
    254             int endpos = line.indexOf('$', pos);
    255             if (endpos < 0) return line;
    256             for (int i = 0; i < CVS_TAGS.length; ++i) {
    257                 if (!line.startsWith(CVS_TAGS[i], pos)) continue;
    258                 line = line.substring(0, pos + CVS_TAGS[i].length()) + line.substring(endpos);
    259             }
    260             return line;
    261         }
    262 
    263     }
    264 
    265     /**
    266      *
    267      * @param file1
    268      * @param file2
    269      * @param failureLines
    270      *            on input, String[2], on output, failing lines
    271      * @param lineComparer
    272      * @return
    273      * @throws IOException
    274      */
    275     public static boolean areFileIdentical(String file1, String file2, String[] failureLines,
    276         LineComparer lineComparer) throws IOException {
    277         try (BufferedReader br1 = new BufferedReader(new FileReader(file1), 32 * 1024);
    278             BufferedReader br2 = new BufferedReader(new FileReader(file2), 32 * 1024);) {
    279             String line1 = "";
    280             String line2 = "";
    281             int skip = 0;
    282 
    283             while (true) {
    284                 if ((skip & LineComparer.SKIP_FIRST) == 0) line1 = br1.readLine();
    285                 if ((skip & LineComparer.SKIP_SECOND) == 0) line2 = br2.readLine();
    286                 if (line1 == null && line2 == null) return true;
    287                 if (line1 == null || line2 == null) {
    288                     // System.out.println("debug");
    289                 }
    290                 skip = lineComparer.compare(line1, line2);
    291                 if (skip == LineComparer.LINES_DIFFERENT) {
    292                     break;
    293                 }
    294             }
    295             failureLines[0] = line1 != null ? line1 : "<end of file>";
    296             failureLines[1] = line2 != null ? line2 : "<end of file>";
    297             return false;
    298         }
    299     }
    300 
    301     /*
    302      * static String getLineWithoutFluff(BufferedReader br1, boolean first, int flags) throws IOException {
    303      * while (true) {
    304      * String line1 = br1.readLine();
    305      * if (line1 == null) return line1;
    306      * if ((flags & TRIM)!= 0) line1 = line1.trim();
    307      * if ((flags & SKIP_EMPTY)!= 0 && line1.length() == 0) continue;
    308      * return line1;
    309      * }
    310      * }
    311      */
    312 
    313     public final static class StringIterator {
    314         String string;
    315         int position = 0;
    316 
    317         char next() {
    318             while (true) {
    319                 if (position >= string.length()) return '\uFFFF';
    320                 char ch = string.charAt(position++);
    321                 if (ch != ' ' && ch != '\t') return ch;
    322             }
    323         }
    324 
    325         StringIterator reset() {
    326             position = 0;
    327             return this;
    328         }
    329 
    330         StringIterator set(String string) {
    331             this.string = string;
    332             position = 0;
    333             return this;
    334         }
    335 
    336         boolean matches(StringIterator other) {
    337             while (true) {
    338                 char c1 = next();
    339                 char c2 = other.next();
    340                 if (c1 != c2) return false;
    341                 if (c1 == '\uFFFF') return true;
    342             }
    343         }
    344 
    345         /**
    346          * @return Returns the position.
    347          */
    348         public int getPosition() {
    349             return position;
    350         }
    351     }
    352 
    353     public static String[] splitArray(String source, char separator) {
    354         return splitArray(source, separator, false);
    355     }
    356 
    357     public static String[] splitArray(String source, char separator, boolean trim) {
    358         List<String> piecesList = splitList(source, separator, trim);
    359         String[] pieces = new String[piecesList.size()];
    360         piecesList.toArray(pieces);
    361         return pieces;
    362     }
    363 
    364     public static String[] splitCommaSeparated(String line) {
    365         // items are separated by ','
    366         // each item is of the form abc...
    367         // or "..." (required if a comma or quote is contained)
    368         // " in a field is represented by ""
    369         List<String> result = new ArrayList<String>();
    370         StringBuilder item = new StringBuilder();
    371         boolean inQuote = false;
    372         for (int i = 0; i < line.length(); ++i) {
    373             char ch = line.charAt(i); // don't worry about supplementaries
    374             switch (ch) {
    375             case '"':
    376                 inQuote = !inQuote;
    377                 // at start or end, that's enough
    378                 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote
    379                 if (inQuote && item.length() != 0) {
    380                     item.append('"');
    381                     inQuote = true;
    382                 }
    383                 break;
    384             case ',':
    385                 if (!inQuote) {
    386                     result.add(item.toString());
    387                     item.setLength(0);
    388                 } else {
    389                     item.append(ch);
    390                 }
    391                 break;
    392             default:
    393                 item.append(ch);
    394                 break;
    395             }
    396         }
    397         result.add(item.toString());
    398         return result.toArray(new String[result.size()]);
    399     }
    400 
    401     public static List<String> splitList(String source, char separator) {
    402         return splitList(source, separator, false, null);
    403     }
    404 
    405     public static List<String> splitList(String source, char separator, boolean trim) {
    406         return splitList(source, separator, trim, null);
    407     }
    408 
    409     public static List<String> splitList(String source, char separator, boolean trim, List<String> output) {
    410         return splitList(source, Character.toString(separator), trim, output);
    411     }
    412 
    413     public static List<String> splitList(String source, String separator) {
    414         return splitList(source, separator, false, null);
    415     }
    416 
    417     public static List<String> splitList(String source, String separator, boolean trim) {
    418         return splitList(source, separator, trim, null);
    419     }
    420 
    421     public static List<String> splitList(String source, String separator, boolean trim, List<String> output) {
    422         if (output == null) output = new ArrayList<String>();
    423         if (source.length() == 0) return output;
    424         int pos = 0;
    425         do {
    426             int npos = source.indexOf(separator, pos);
    427             if (npos < 0) npos = source.length();
    428             String piece = source.substring(pos, npos);
    429             if (trim) piece = piece.trim();
    430             output.add(piece);
    431             pos = npos + 1;
    432         } while (pos < source.length());
    433         return output;
    434     }
    435 
    436     /**
    437      * Protect a collection (as much as Java lets us!) from modification.
    438      * Really, really ugly code, since Java doesn't let us do better.
    439      */
    440     @SuppressWarnings({ "rawtypes", "unchecked" })
    441     public static <T> T protectCollection(T source) {
    442         // TODO - exclude UnmodifiableMap, Set, ...
    443         if (source instanceof Map) {
    444             Map sourceMap = (Map) source;
    445             Map resultMap = clone(sourceMap);
    446             if (resultMap == null) return (T) sourceMap; // failed
    447             resultMap.clear();
    448             for (Object key : sourceMap.keySet()) {
    449                 resultMap.put(protectCollection(key), protectCollection(sourceMap.get(key)));
    450             }
    451             return resultMap instanceof SortedMap ? (T) Collections.unmodifiableSortedMap((SortedMap) resultMap)
    452                 : (T) Collections.unmodifiableMap(resultMap);
    453         } else if (source instanceof Collection) {
    454             Collection sourceCollection = (Collection) source;
    455             Collection<Object> resultCollection = clone(sourceCollection);
    456             if (resultCollection == null) return (T) sourceCollection; // failed
    457             resultCollection.clear();
    458 
    459             for (Object item : sourceCollection) {
    460                 resultCollection.add(protectCollection(item));
    461             }
    462 
    463             return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection)
    464                 : sourceCollection instanceof SortedSet ? (T) Collections
    465                     .unmodifiableSortedSet((SortedSet) sourceCollection)
    466                     : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection)
    467                         : (T) Collections.unmodifiableCollection(sourceCollection);
    468         } else if (source instanceof Freezable) {
    469             Freezable freezableSource = (Freezable) source;
    470             if (freezableSource.isFrozen()) return source;
    471             return (T) ((Freezable) (freezableSource.cloneAsThawed())).freeze();
    472         } else {
    473             return source; // can't protect
    474         }
    475     }
    476 
    477     /**
    478      * Protect a collections where we don't need to clone.
    479      * @param source
    480      * @return
    481      */
    482     @SuppressWarnings({ "rawtypes", "unchecked" })
    483     public static <T> T protectCollectionX(T source) {
    484         // TODO - exclude UnmodifiableMap, Set, ...
    485         if (isImmutable(source)) {
    486             return source;
    487         }
    488         if (source instanceof Map) {
    489             Map sourceMap = (Map) source;
    490             // recurse
    491             LinkedHashMap tempMap = new LinkedHashMap<>(sourceMap); // copy contents
    492             sourceMap.clear();
    493             for (Object key : tempMap.keySet()) {
    494                 sourceMap.put(protectCollection(key), protectCollectionX(tempMap.get(key)));
    495             }
    496             return sourceMap instanceof SortedMap ? (T) Collections.unmodifiableSortedMap((SortedMap) sourceMap)
    497                 : (T) Collections.unmodifiableMap(sourceMap);
    498         } else if (source instanceof Collection) {
    499             Collection sourceCollection = (Collection) source;
    500             LinkedHashSet tempSet = new LinkedHashSet<>(sourceCollection); // copy contents
    501 
    502             sourceCollection.clear();
    503             for (Object item : tempSet) {
    504                 sourceCollection.add(protectCollectionX(item));
    505             }
    506 
    507             return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection)
    508                 : sourceCollection instanceof SortedSet ? (T) Collections
    509                     .unmodifiableSortedSet((SortedSet) sourceCollection)
    510                     : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection)
    511                         : (T) Collections.unmodifiableCollection(sourceCollection);
    512         } else if (source instanceof Freezable) {
    513             Freezable freezableSource = (Freezable) source;
    514             return (T) freezableSource.freeze();
    515         } else {
    516             throw new IllegalArgumentException("Cant protect: " + source.getClass().toString());
    517         }
    518     }
    519 
    520     private static final Set<Object> KNOWN_IMMUTABLES = new HashSet<Object>(Arrays.asList(
    521         String.class));
    522 
    523     public static boolean isImmutable(Object source) {
    524         return source == null
    525             || source instanceof Enum
    526             || source instanceof Number
    527             || KNOWN_IMMUTABLES.contains(source.getClass());
    528     }
    529 
    530     /**
    531      * Clones T if we can; otherwise returns null.
    532      *
    533      * @param <T>
    534      * @param source
    535      * @return
    536      */
    537     @SuppressWarnings("unchecked")
    538     private static <T> T clone(T source) {
    539         final Class<? extends Object> class1 = source.getClass();
    540         try {
    541             final Method declaredMethod = class1.getDeclaredMethod("clone", (Class<?>) null);
    542             return (T) declaredMethod.invoke(source, (Object) null);
    543         } catch (Exception e) {
    544         }
    545         try {
    546             final Constructor<? extends Object> declaredMethod = class1.getConstructor((Class<?>) null);
    547             return (T) declaredMethod.newInstance((Object) null);
    548         } catch (Exception e) {
    549         }
    550         return null; // uncloneable
    551     }
    552 
    553     /**
    554      * Appends two strings, inserting separator if either is empty
    555      */
    556     public static String joinWithSeparation(String a, String separator, String b) {
    557         if (a.length() == 0) return b;
    558         if (b.length() == 0) return a;
    559         return a + separator + b;
    560     }
    561 
    562     /**
    563      * Appends two strings, inserting separator if either is empty. Modifies first map
    564      */
    565     public static Map<String, String> joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b) {
    566         for (Iterator<String> it = b.keySet().iterator(); it.hasNext();) {
    567             String key = it.next();
    568             String bvalue = b.get(key);
    569             String avalue = a.get(key);
    570             if (avalue != null) {
    571                 if (avalue.trim().equals(bvalue.trim())) continue;
    572                 bvalue = joinWithSeparation(avalue, separator, bvalue);
    573             }
    574             a.put(key, bvalue);
    575         }
    576         return a;
    577     }
    578 
    579     public static <T> String join(Collection<T> c, String separator) {
    580         return join(c, separator, null);
    581     }
    582 
    583     public static String join(Object[] c, String separator) {
    584         return join(c, separator, null);
    585     }
    586 
    587     public static <T> String join(Collection<T> c, String separator, Transform<T, String> transform) {
    588         StringBuffer output = new StringBuffer();
    589         boolean isFirst = true;
    590         for (T item : c) {
    591             if (isFirst) {
    592                 isFirst = false;
    593             } else {
    594                 output.append(separator);
    595             }
    596             output.append(transform != null ? transform.transform(item) : item == null ? item : item.toString());
    597         }
    598         return output.toString();
    599     }
    600 
    601     public static <T> String join(T[] c, String separator, Transform<T, String> transform) {
    602         return join(Arrays.asList(c), separator, transform);
    603     }
    604 
    605     /**
    606      * Utility like Arrays.asList()
    607      */
    608     @SuppressWarnings("unchecked")
    609     public static <K, V> Map<K, V> asMap(Object[][] source, Map<K, V> target, boolean reverse) {
    610         int from = 0, to = 1;
    611         if (reverse) {
    612             from = 1;
    613             to = 0;
    614         }
    615         for (int i = 0; i < source.length; ++i) {
    616             if (source[i].length != 2) {
    617                 throw new IllegalArgumentException("Source must be array of pairs of strings: "
    618                     + Arrays.asList(source[i]));
    619             }
    620             target.put((K) source[i][from], (V) source[i][to]);
    621         }
    622         return target;
    623     }
    624 
    625     public static <K, V> Map<K, V> asMap(Object[][] source) {
    626         return asMap(source, new HashMap<K, V>(), false);
    627     }
    628 
    629     /**
    630      * Returns the canonical name for a file.
    631      */
    632     public static String getCanonicalName(String file) {
    633         try {
    634             return new File(file).getCanonicalPath();
    635         } catch (Exception e) {
    636             return file;
    637         }
    638     }
    639 
    640     /**
    641      * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that are in the UnicodeSet,
    642      * Supplementary ranges, and escaping
    643      *
    644      * @param source
    645      *            The source set
    646      * @param escaper
    647      *            A transliterator that is used to escape the characters according to the requirements of the regex.
    648      * @return
    649      */
    650     public static String toRegex(UnicodeSet source) {
    651         return toRegex(source, null, false);
    652     }
    653 
    654     private static final Transliterator DEFAULT_REGEX_ESCAPER = Transliterator.createFromRules(
    655         "foo",
    656         "([ \\- \\\\ \\[ \\] ]) > '\\' $1 ;"
    657             // + " ([:c:]) > &hex($1);"
    658             + " ([[:control:][[:z:]&[:ascii:]]]) > &hex($1);",
    659         Transliterator.FORWARD);
    660 
    661     /**
    662      * Convert a UnicodeSet into a string that can be embedded into a Regex.
    663      * Handles strings that are in the UnicodeSet, Supplementary ranges, and
    664      * escaping
    665      *
    666      * @param source
    667      *            The source set
    668      * @param escaper
    669      *            A transliterator that is used to escape the characters according
    670      *            to the requirements of the regex. The default puts a \\ before [, -,
    671      *            \, and ], and converts controls and Ascii whitespace to hex.
    672      *            Alternatives can be supplied. Note that some Regex engines,
    673      *            including Java 1.5, don't really deal with escaped supplementaries
    674      *            well.
    675      * @param onlyBmp
    676      *            Set to true if the Regex only accepts BMP characters. In that
    677      *            case, ranges of supplementary characters are converted to lists of
    678      *            ranges. For example, [\uFFF0-\U0010000F \U0010100F-\U0010300F]
    679      *            converts into:
    680      *
    681      *            <pre>
    682      *          [\uD800][\uDC00-\uDFFF]
    683      *          [\uD801-\uDBBF][\uDC00-\uDFFF]
    684      *          [\uDBC0][\uDC00-\uDC0F]
    685      * </pre>
    686      *
    687      *            and
    688      *
    689      *            <pre>
    690      *          [\uDBC4][\uDC0F-\uDFFF]
    691      *          [\uDBC5-\uDBCB][\uDC00-\uDFFF]
    692      *          [\uDBCC][\uDC00-\uDC0F]
    693      * </pre>
    694      *
    695      *            These are then coalesced into a list of alternatives by sharing
    696      *            parts where feasible. For example, the above turns into 3 pairs of ranges:
    697      *
    698      *            <pre>
    699      *          [\uDBC0\uDBCC][\uDC00-\uDC0F]|\uDBC4[\uDC0F-\uDFFF]|[\uD800-\uDBBF\uDBC5-\uDBCB][\uDC00-\uDFFF]
    700      * </pre>
    701      *
    702      * @return escaped string. Something like [a-z] or (?:[a-m]|{zh}) if there is
    703      *         a string zh in the set, or a more complicated case for
    704      *         supplementaries. <br>
    705      *         Special cases: [] returns "", single item returns a string
    706      *         (escaped), like [a] => "a", or [{abc}] => "abc"<br>
    707      *         Supplementaries are handled specially, as described under onlyBmp.
    708      */
    709     public static String toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp) {
    710         if (escaper == null) {
    711             escaper = DEFAULT_REGEX_ESCAPER;
    712         }
    713         UnicodeSetIterator it = new UnicodeSetIterator(source);
    714         // if there is only one item, return it
    715         if (source.size() == 0) {
    716             return "";
    717         }
    718         if (source.size() == 1) {
    719             it.next();
    720             return escaper.transliterate(it.getString());
    721         }
    722         // otherwise, we figure out what is in the set, and will return
    723         StringBuilder base = new StringBuilder("[");
    724         StringBuilder alternates = new StringBuilder();
    725         Map<UnicodeSet, UnicodeSet> lastToFirst = new TreeMap<UnicodeSet, UnicodeSet>(new UnicodeSetComparator());
    726         int alternateCount = 0;
    727         while (it.nextRange()) {
    728             if (it.codepoint == UnicodeSetIterator.IS_STRING) {
    729                 ++alternateCount;
    730                 alternates.append('|').append(escaper.transliterate(it.string));
    731             } else if (!onlyBmp || it.codepointEnd <= 0xFFFF) { // BMP
    732                 addBmpRange(it.codepoint, it.codepointEnd, escaper, base);
    733             } else { // supplementary
    734                 if (it.codepoint <= 0xFFFF) {
    735                     addBmpRange(it.codepoint, 0xFFFF, escaper, base);
    736                     it.codepoint = 0x10000; // reset the range
    737                 }
    738                 // this gets a bit ugly; we are trying to minimize the extra ranges for supplementaries
    739                 // we do this by breaking up X-Y based on the Lead and Trail values for X and Y
    740                 // Lx [Tx - Ty]) (if Lx == Ly)
    741                 // Lx [Tx - DFFF] | Ly [DC00-Ty] (if Lx == Ly - 1)
    742                 // Lx [Tx - DFFF] | [Lx+1 - Ly-1][DC00-DFFF] | Ly [DC00-Ty] (otherwise)
    743                 int leadX = UTF16.getLeadSurrogate(it.codepoint);
    744                 int trailX = UTF16.getTrailSurrogate(it.codepoint);
    745                 int leadY = UTF16.getLeadSurrogate(it.codepointEnd);
    746                 int trailY = UTF16.getTrailSurrogate(it.codepointEnd);
    747                 if (leadX == leadY) {
    748                     addSupplementalRange(leadX, leadX, trailX, trailY, escaper, lastToFirst);
    749                 } else {
    750                     addSupplementalRange(leadX, leadX, trailX, 0xDFFF, escaper, lastToFirst);
    751                     if (leadX != leadY - 1) {
    752                         addSupplementalRange(leadX + 1, leadY - 1, 0xDC00, 0xDFFF, escaper, lastToFirst);
    753                     }
    754                     addSupplementalRange(leadY, leadY, 0xDC00, trailY, escaper, lastToFirst);
    755                 }
    756             }
    757         }
    758         // add in the supplementary ranges
    759         if (lastToFirst.size() != 0) {
    760             for (UnicodeSet last : lastToFirst.keySet()) {
    761                 ++alternateCount;
    762                 alternates.append('|').append(toRegex(lastToFirst.get(last), escaper, onlyBmp))
    763                     .append(toRegex(last, escaper, onlyBmp));
    764             }
    765         }
    766         // Return the output. We separate cases in order to get the minimal extra apparatus
    767         base.append("]");
    768         if (alternateCount == 0) {
    769             return base.toString();
    770         } else if (base.length() > 2) {
    771             return "(?:" + base + "|" + alternates.substring(1) + ")";
    772         } else if (alternateCount == 1) {
    773             return alternates.substring(1);
    774         } else {
    775             return "(?:" + alternates.substring(1) + ")";
    776         }
    777     }
    778 
    779     private static void addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper,
    780         Map<UnicodeSet, UnicodeSet> lastToFirst) {
    781         System.out.println("\tadding: " + new UnicodeSet(leadX, leadY) + "\t" + new UnicodeSet(trailX, trailY));
    782         UnicodeSet last = new UnicodeSet(trailX, trailY);
    783         UnicodeSet first = lastToFirst.get(last);
    784         if (first == null) {
    785             lastToFirst.put(last, first = new UnicodeSet());
    786         }
    787         first.add(leadX, leadY);
    788     }
    789 
    790     private static void addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base) {
    791         base.append(escaper.transliterate(UTF16.valueOf(start)));
    792         if (start != limit) {
    793             base.append("-").append(escaper.transliterate(UTF16.valueOf(limit)));
    794         }
    795     }
    796 
    797     public static class UnicodeSetComparator implements Comparator<UnicodeSet> {
    798         public int compare(UnicodeSet o1, UnicodeSet o2) {
    799             return o1.compareTo(o2);
    800         }
    801     }
    802 
    803     public static class CollectionComparator<T extends Comparable<T>> implements Comparator<Collection<T>> {
    804         public int compare(Collection<T> o1, Collection<T> o2) {
    805             return UnicodeSet.compare(o1, o2, UnicodeSet.ComparisonStyle.SHORTER_FIRST);
    806         }
    807     }
    808 
    809     public static class ComparableComparator<T extends Comparable<T>> implements Comparator<T> {
    810         public int compare(T arg0, T arg1) {
    811             return Utility.checkCompare(arg0, arg1);
    812         }
    813     }
    814 
    815     @SuppressWarnings({ "rawtypes", "unchecked" })
    816     public static void addTreeMapChain(Map coverageData, Object... objects) {
    817         Map<Object, Object> base = coverageData;
    818         for (int i = 0; i < objects.length - 2; ++i) {
    819             Map<Object, Object> nextOne = (Map<Object, Object>) base.get(objects[i]);
    820             if (nextOne == null) base.put(objects[i], nextOne = new TreeMap<Object, Object>());
    821             base = nextOne;
    822         }
    823         base.put(objects[objects.length - 2], objects[objects.length - 1]);
    824     }
    825 
    826     public static abstract class CollectionTransform<S, T> implements Transform<S, T> {
    827         public abstract T transform(S source);
    828 
    829         public Collection<T> transform(Collection<S> input, Collection<T> output) {
    830             return CldrUtility.transform(input, this, output);
    831         }
    832 
    833         public Collection<T> transform(Collection<S> input) {
    834             return transform(input, new ArrayList<T>());
    835         }
    836     }
    837 
    838     public static <S, T, SC extends Collection<S>, TC extends Collection<T>> TC transform(SC source, Transform<S, T> transform, TC target) {
    839         for (S sourceItem : source) {
    840             T targetItem = transform.transform(sourceItem);
    841             if (targetItem != null) {
    842                 target.add(targetItem);
    843             }
    844         }
    845         return target;
    846     }
    847 
    848     public static <SK, SV, TK, TV, SM extends Map<SK, SV>, TM extends Map<TK, TV>> TM transform(
    849         SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target) {
    850         for (Entry<SK, SV> sourceEntry : source.entrySet()) {
    851             TK targetKey = transformKey.transform(sourceEntry.getKey());
    852             TV targetValue = transformValue.transform(sourceEntry.getValue());
    853             if (targetKey != null && targetValue != null) {
    854                 target.put(targetKey, targetValue);
    855             }
    856         }
    857         return target;
    858     }
    859 
    860     public static abstract class Apply<T> {
    861         public abstract void apply(T item);
    862 
    863         public <U extends Collection<T>> void applyTo(U collection) {
    864             for (T item : collection) {
    865                 apply(item);
    866             }
    867         }
    868     }
    869 
    870     public static abstract class Filter<T> {
    871 
    872         public abstract boolean contains(T item);
    873 
    874         public <U extends Collection<T>> U retainAll(U c) {
    875             for (Iterator<T> it = c.iterator(); it.hasNext();) {
    876                 if (!contains(it.next())) it.remove();
    877             }
    878             return c;
    879         }
    880 
    881         public <U extends Collection<T>> U extractMatches(U c, U target) {
    882             for (Iterator<T> it = c.iterator(); it.hasNext();) {
    883                 T item = it.next();
    884                 if (contains(item)) {
    885                     target.add(item);
    886                 }
    887             }
    888             return target;
    889         }
    890 
    891         public <U extends Collection<T>> U removeAll(U c) {
    892             for (Iterator<T> it = c.iterator(); it.hasNext();) {
    893                 if (contains(it.next())) it.remove();
    894             }
    895             return c;
    896         }
    897 
    898         public <U extends Collection<T>> U extractNonMatches(U c, U target) {
    899             for (Iterator<T> it = c.iterator(); it.hasNext();) {
    900                 T item = it.next();
    901                 if (!contains(item)) {
    902                     target.add(item);
    903                 }
    904             }
    905             return target;
    906         }
    907     }
    908 
    909     public static class MatcherFilter<T> extends Filter<T> {
    910         private Matcher matcher;
    911 
    912         public MatcherFilter(String pattern) {
    913             this.matcher = PatternCache.get(pattern).matcher("");
    914         }
    915 
    916         public MatcherFilter(Matcher matcher) {
    917             this.matcher = matcher;
    918         }
    919 
    920         public MatcherFilter<T> set(Matcher matcher) {
    921             this.matcher = matcher;
    922             return this;
    923         }
    924 
    925         public MatcherFilter<T> set(String pattern) {
    926             this.matcher = PatternCache.get(pattern).matcher("");
    927             return this;
    928         }
    929 
    930         public boolean contains(T o) {
    931             return matcher.reset(o.toString()).matches();
    932         }
    933     }
    934 
    935     // static final class HandlingTransform implements Transform<String, Handling> {
    936     // @Override
    937     // public Handling transform(String source) {
    938     // return Handling.valueOf(source);
    939     // }
    940     // }
    941 
    942     public static final class PairComparator<K extends Comparable<K>, V extends Comparable<V>> implements java.util.Comparator<Pair<K, V>> {
    943 
    944         private Comparator<K> comp1;
    945         private Comparator<V> comp2;
    946 
    947         public PairComparator(Comparator<K> comp1, Comparator<V> comp2) {
    948             this.comp1 = comp1;
    949             this.comp2 = comp2;
    950         }
    951 
    952         @Override
    953         public int compare(Pair<K, V> o1, Pair<K, V> o2) {
    954             {
    955                 K o1First = o1.getFirst();
    956                 K o2First = o2.getFirst();
    957                 int diff = o1First == null ? (o2First == null ? 0 : -1)
    958                     : o2First == null ? 1
    959                         : comp1 == null ? o1First.compareTo(o2First)
    960                             : comp1.compare(o1First, o2First);
    961                 if (diff != 0) {
    962                     return diff;
    963                 }
    964             }
    965             V o1Second = o1.getSecond();
    966             V o2Second = o2.getSecond();
    967             return o1Second == null ? (o2Second == null ? 0 : -1)
    968                 : o2Second == null ? 1
    969                     : comp2 == null ? o1Second.compareTo(o2Second)
    970                         : comp2.compare(o1Second, o2Second);
    971         }
    972 
    973     }
    974 
    975     /**
    976      * Fetch data from jar
    977      *
    978      * @param name
    979      *            a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break.
    980      */
    981     public static BufferedReader getUTF8Data(String name) {
    982         if (new File(name).isAbsolute()) {
    983             throw new IllegalArgumentException(
    984                 "Path must be relative to org/unicode/cldr/util/data  such as 'file.txt' or 'casing/file.txt', but got '"
    985                     + name + "'.");
    986         }
    987 
    988         return FileReaders.openFile(CldrUtility.class, "data/" + name);
    989     }
    990 
    991     /**
    992      * Fetch data from jar
    993      *
    994      * @param name
    995      *            a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break.
    996      */
    997     public static InputStream getInputStream(String name) {
    998         if (new File(name).isAbsolute()) {
    999             throw new IllegalArgumentException(
   1000                 "Path must be relative to org/unicode/cldr/util/data  such as 'file.txt' or 'casing/file.txt', but got '"
   1001                     + name + "'.");
   1002         }
   1003         return getInputStream(CldrUtility.class, "data/" + name);
   1004     }
   1005 
   1006     @SuppressWarnings("resource")
   1007     public static InputStream getInputStream(Class<?> callingClass, String relativePath) {
   1008         InputStream is = callingClass.getResourceAsStream(relativePath);
   1009         // add buffering
   1010         return InputStreamFactory.buffer(is);
   1011     }
   1012 
   1013     /**
   1014      * Takes a Map that goes from Object to Set, and fills in the transpose
   1015      *
   1016      * @param source_key_valueSet
   1017      * @param output_value_key
   1018      */
   1019     public static void putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key) {
   1020         for (Iterator<Object> it = source_key_valueSet.keySet().iterator(); it.hasNext();) {
   1021             Object key = it.next();
   1022             Set<Object> values = source_key_valueSet.get(key);
   1023             for (Iterator<Object> it2 = values.iterator(); it2.hasNext();) {
   1024                 Object value = it2.next();
   1025                 output_value_key.put(value, key);
   1026             }
   1027         }
   1028     }
   1029 
   1030     public static int countInstances(String source, String substring) {
   1031         int count = 0;
   1032         int pos = 0;
   1033         while (true) {
   1034             pos = source.indexOf(substring, pos) + 1;
   1035             if (pos <= 0) break;
   1036             count++;
   1037         }
   1038         return count;
   1039     }
   1040 
   1041     public static void registerTransliteratorFromFile(String id, String dir, String filename) {
   1042         registerTransliteratorFromFile(id, dir, filename, Transliterator.FORWARD, true);
   1043         registerTransliteratorFromFile(id, dir, filename, Transliterator.REVERSE, true);
   1044     }
   1045 
   1046     public static void registerTransliteratorFromFile(String id, String dir, String filename, int direction,
   1047         boolean reverseID) {
   1048         if (filename == null) {
   1049             filename = id.replace('-', '_');
   1050             filename = filename.replace('/', '_');
   1051             filename += ".txt";
   1052         }
   1053         String rules = getText(dir, filename);
   1054         Transliterator t;
   1055         int pos = id.indexOf('-');
   1056         String rid;
   1057         if (pos < 0) {
   1058             rid = id + "-Any";
   1059             id = "Any-" + id;
   1060         } else {
   1061             rid = id.substring(pos + 1) + "-" + id.substring(0, pos);
   1062         }
   1063         if (!reverseID) rid = id;
   1064 
   1065         if (direction == Transliterator.FORWARD) {
   1066             Transliterator.unregister(id);
   1067             t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
   1068             Transliterator.registerInstance(t);
   1069             System.out.println("Registered new Transliterator: " + id);
   1070         }
   1071 
   1072         /*
   1073          * String test = "\u049A\u0430\u0437\u0430\u049B";
   1074          * System.out.println(t.transliterate(test));
   1075          * t = Transliterator.getInstance(id);
   1076          * System.out.println(t.transliterate(test));
   1077          */
   1078 
   1079         if (direction == Transliterator.REVERSE) {
   1080             Transliterator.unregister(rid);
   1081             t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);
   1082             Transliterator.registerInstance(t);
   1083             System.out.println("Registered new Transliterator: " + rid);
   1084         }
   1085     }
   1086 
   1087     public static String getText(String dir, String filename) {
   1088         try {
   1089             BufferedReader br = FileUtilities.openUTF8Reader(dir, filename);
   1090             StringBuffer buffer = new StringBuffer();
   1091             while (true) {
   1092                 String line = br.readLine();
   1093                 if (line == null) break;
   1094                 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);
   1095                 if (line.startsWith("//")) continue;
   1096                 buffer.append(line).append(CldrUtility.LINE_SEPARATOR);
   1097             }
   1098             br.close();
   1099             String rules = buffer.toString();
   1100             return rules;
   1101         } catch (IOException e) {
   1102             throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + filename)
   1103                 .initCause(e);
   1104         }
   1105     }
   1106 
   1107     public static void callMethod(String methodNames, Class<?> cls) {
   1108         for (String methodName : methodNames.split(",")) {
   1109             try {
   1110                 Method method;
   1111                 try {
   1112                     method = cls.getMethod(methodName, (Class[]) null);
   1113                     try {
   1114                         method.invoke(null, (Object[]) null);
   1115                     } catch (Exception e) {
   1116                         e.printStackTrace();
   1117                     }
   1118                 } catch (Exception e) {
   1119                     System.out.println("No such method: " + methodName);
   1120                     showMethods(cls);
   1121                 }
   1122             } catch (ClassNotFoundException e) {
   1123                 e.printStackTrace();
   1124             }
   1125         }
   1126     }
   1127 
   1128     public static void showMethods(Class<?> cls) throws ClassNotFoundException {
   1129         System.out.println("Possible methods of " + cls.getCanonicalName() + " are: ");
   1130         Method[] methods = cls.getMethods();
   1131         Set<String> names = new TreeSet<String>();
   1132         for (int i = 0; i < methods.length; ++i) {
   1133             if (methods[i].getGenericParameterTypes().length != 0) continue;
   1134             //int mods = methods[i].getModifiers();
   1135             // if (!Modifier.isStatic(mods)) continue;
   1136             String name = methods[i].getName();
   1137             names.add(name);
   1138         }
   1139         for (Iterator<String> it = names.iterator(); it.hasNext();) {
   1140             System.out.println("\t" + it.next());
   1141         }
   1142     }
   1143 
   1144     /**
   1145      * Breaks lines if they are too long, or if matcher.group(1) != last. Only breaks just before matcher.
   1146      *
   1147      * @param input
   1148      * @param separator
   1149      * @param matcher
   1150      *            must match each possible item. The first group is significant; if different, will cause break
   1151      * @return
   1152      */
   1153     static public String breakLines(CharSequence input, String separator, Matcher matcher, int width) {
   1154         StringBuffer output = new StringBuffer();
   1155         String lastPrefix = "";
   1156         int lastEnd = 0;
   1157         int lastBreakPos = 0;
   1158         matcher.reset(input);
   1159         while (true) {
   1160             boolean match = matcher.find();
   1161             if (!match) {
   1162                 output.append(input.subSequence(lastEnd, input.length()));
   1163                 break;
   1164             }
   1165             String prefix = matcher.group(1);
   1166             if (!prefix.equalsIgnoreCase(lastPrefix) || matcher.end() - lastBreakPos > width) { // break before?
   1167                 output.append(separator);
   1168                 lastBreakPos = lastEnd;
   1169             } else if (lastEnd != 0) {
   1170                 output.append(' ');
   1171             }
   1172             output.append(input.subSequence(lastEnd, matcher.end()).toString().trim());
   1173             lastEnd = matcher.end();
   1174             lastPrefix = prefix;
   1175         }
   1176         return output.toString();
   1177     }
   1178 
   1179     public static void showOptions(String[] args) {
   1180         // Properties props = System.getProperties();
   1181         System.out.println("Arguments: " + join(args, " ")); // + (props == null ? "" : " " + props));
   1182     }
   1183 
   1184     public static double roundToDecimals(double input, int places) {
   1185         double log10 = Math.log10(input); // 15000 => 4.xxx
   1186         double intLog10 = Math.floor(log10);
   1187         double scale = Math.pow(10, intLog10 - places + 1);
   1188         double factored = Math.round(input / scale) * scale;
   1189         // System.out.println("###\t" +input + "\t" + factored);
   1190         return factored;
   1191     }
   1192 
   1193     /**
   1194      * Get a property value, returning the value if there is one (eg -Dkey=value),
   1195      * otherwise the default value (for either empty or null).
   1196      *
   1197      * @param key
   1198      * @param valueIfNull
   1199      * @param valueIfEmpty
   1200      * @return
   1201      */
   1202     public static String getProperty(String key, String defaultValue) {
   1203         return getProperty(key, defaultValue, defaultValue);
   1204     }
   1205 
   1206     /**
   1207      * Get a property value, returning the value if there is one, otherwise null.
   1208      */
   1209     public static String getProperty(String key) {
   1210         return getProperty(key, null, null);
   1211     }
   1212 
   1213     /**
   1214      * Get a property value, returning the value if there is one (eg -Dkey=value),
   1215      * the valueIfEmpty if there is one with no value (eg -Dkey) and the valueIfNull
   1216      * if there is no property.
   1217      *
   1218      * @param key
   1219      * @param valueIfNull
   1220      * @param valueIfEmpty
   1221      * @return
   1222      */
   1223     public static String getProperty(String key, String valueIfNull, String valueIfEmpty) {
   1224         String result = CLDRConfig.getInstance().getProperty(key);
   1225         if (result == null) {
   1226             result = valueIfNull;
   1227         } else if (result.length() == 0) {
   1228             result = valueIfEmpty;
   1229         }
   1230         return result;
   1231     }
   1232 
   1233     public static String hex(byte[] bytes, int start, int end, String separator) {
   1234         StringBuilder result = new StringBuilder();
   1235         for (int i = 0; i < end; ++i) {
   1236             if (result.length() != 0) {
   1237                 result.append(separator);
   1238             }
   1239             result.append(Utility.hex(bytes[i] & 0xFF, 2));
   1240         }
   1241         return result.toString();
   1242     }
   1243 
   1244     public static boolean getProperty(String string, boolean b) {
   1245         return getProperty(string, b ? "true" : "false", "true").matches("(?i)T|TRUE");
   1246     }
   1247 
   1248     public static String checkValidDirectory(String sourceDirectory) {
   1249         return checkValidFile(sourceDirectory, true, null);
   1250     }
   1251 
   1252     public static String checkValidDirectory(String sourceDirectory, String correction) {
   1253         return checkValidFile(sourceDirectory, true, correction);
   1254     }
   1255 
   1256     public static String checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction) {
   1257         File file = null;
   1258         String canonicalPath = null;
   1259         try {
   1260             file = new File(sourceDirectory);
   1261             canonicalPath = file.getCanonicalPath() + File.separatorChar;
   1262         } catch (Exception e) {
   1263         }
   1264         if (file == null || canonicalPath == null || checkForDirectory && !file.isDirectory()) {
   1265             throw new RuntimeException("Directory not found: " + sourceDirectory
   1266                 + (canonicalPath == null ? "" : " => " + canonicalPath)
   1267                 + (correction == null ? "" : CldrUtility.LINE_SEPARATOR + correction));
   1268         }
   1269         return canonicalPath;
   1270     }
   1271 
   1272     /**
   1273      * Copy up to matching line (not included). If output is null, then just skip until.
   1274      *
   1275      * @param oldFile
   1276      *            file to copy
   1277      * @param readUntilPattern
   1278      *            pattern to search for. If null, goes to end of file.
   1279      * @param output
   1280      *            into to copy into. If null, just skips in the input.
   1281      * @param includeMatchingLine
   1282      *            inclde the matching line when copying.
   1283      * @throws IOException
   1284      */
   1285     public static void copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern,
   1286         final PrintWriter output, boolean includeMatchingLine) throws IOException {
   1287         Matcher readUntil = readUntilPattern == null ? null : readUntilPattern.matcher("");
   1288         while (true) {
   1289             String line = oldFile.readLine();
   1290             if (line == null) {
   1291                 break;
   1292             }
   1293             if (line.startsWith("\uFEFF")) {
   1294                 line = line.substring(1);
   1295             }
   1296             if (readUntil != null && readUntil.reset(line).matches()) {
   1297                 if (includeMatchingLine && output != null) {
   1298                     output.println(line);
   1299                 }
   1300                 break;
   1301             }
   1302             if (output != null) {
   1303                 output.println(line);
   1304             }
   1305         }
   1306     }
   1307 
   1308     private static DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'GMT'");
   1309     private static DateFormat DATE_ONLY = new SimpleDateFormat("yyyy-MM-dd");
   1310     static {
   1311         df.setTimeZone(TimeZone.getTimeZone("GMT"));
   1312         DATE_ONLY.setTimeZone(TimeZone.getTimeZone("GMT"));
   1313     }
   1314 
   1315     public static String isoFormat(Date date) {
   1316         synchronized (df) {
   1317             return df.format(date);
   1318         }
   1319     }
   1320 
   1321     public static String isoFormatDateOnly(Date date) {
   1322         synchronized (DATE_ONLY) {
   1323             return DATE_ONLY.format(date);
   1324         }
   1325     }
   1326 
   1327     public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap() {
   1328         // http://ria101.wordpress.com/2011/12/12/concurrenthashmap-avoid-a-common-misuse/
   1329         return new ConcurrentHashMap<K, V>(4, 0.9f, 1);
   1330     }
   1331 
   1332     public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap(Map<K, V> source) {
   1333         ConcurrentHashMap<K, V> result = newConcurrentHashMap();
   1334         result.putAll(source);
   1335         return result;
   1336     }
   1337 
   1338     public static boolean equals(Object a, Object b) {
   1339         return a == b ? true
   1340             : a == null || b == null ? false
   1341                 : a.equals(b);
   1342     }
   1343 
   1344     public static String getDoubleLink(String code) {
   1345         final String anchorSafe = TransliteratorUtilities.toHTML.transliterate(code).replace(" ", "_");
   1346         return "<a name='" + anchorSafe + "' href='#" + anchorSafe + "'>";
   1347     }
   1348 
   1349     public static String getDoubleLinkedText(String anchor, String anchorText) {
   1350         return getDoubleLink(anchor) + TransliteratorUtilities.toHTML.transliterate(anchorText).replace("_", " ")
   1351             + "</a>";
   1352     }
   1353 
   1354     public static String getDoubleLinkedText(String anchor) {
   1355         return getDoubleLinkedText(anchor, anchor);
   1356     }
   1357 
   1358     public static String getDoubleLinkMsg() {
   1359         return "<a name=''{0}'' href=''#{0}''>{0}</a>";
   1360     }
   1361 
   1362     public static String getDoubleLinkMsg2() {
   1363         return "<a name=''{0}{1}'' href=''#{0}{1}''>{0}</a>";
   1364     }
   1365 
   1366     public static String getCopyrightString() {
   1367         // now do the rest
   1368         return "Copyright \u00A9 1991-" + Calendar.getInstance().get(Calendar.YEAR) + " Unicode, Inc." + CldrUtility.LINE_SEPARATOR
   1369             + "For terms of use, see http://www.unicode.org/copyright.html" + CldrUtility.LINE_SEPARATOR
   1370             + "Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries." + CldrUtility.LINE_SEPARATOR
   1371             + "CLDR data files are interpreted according to the LDML specification " + "(http://unicode.org/reports/tr35/)";
   1372     }
   1373 
   1374     // TODO Move to collection utilities
   1375     /**
   1376      * Type-safe get
   1377      * @param map
   1378      * @param key
   1379      * @return value
   1380      */
   1381     public static <K, V, M extends Map<K, V>> V get(M map, K key) {
   1382         return map.get(key);
   1383     }
   1384 
   1385     /**
   1386      * Type-safe contains
   1387      * @param map
   1388      * @param key
   1389      * @return value
   1390      */
   1391     public static <K, C extends Collection<K>> boolean contains(C collection, K key) {
   1392         return collection.contains(key);
   1393     }
   1394 
   1395     public static <E extends Enum<E>> EnumSet<E> toEnumSet(Class<E> classValue, Collection<String> stringValues) {
   1396         EnumSet<E> result = EnumSet.noneOf(classValue);
   1397         for (String s : stringValues) {
   1398             result.add(Enum.valueOf(classValue, s));
   1399         }
   1400         return result;
   1401     }
   1402 
   1403     public static <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) {
   1404         if (!map.containsKey(key)) {
   1405             map.put(key, value);
   1406         }
   1407         return map;
   1408     }
   1409 
   1410     public static String[] cleanSemiFields(String line) {
   1411         line = cleanLine(line);
   1412         return line.isEmpty() ? null : SEMI_SPLIT.split(line);
   1413     }
   1414 
   1415     private static String cleanLine(String line) {
   1416         int comment = line.indexOf("#");
   1417         if (comment >= 0) {
   1418             line = line.substring(0, comment);
   1419         }
   1420         if (line.startsWith("\uFEFF")) {
   1421             line = line.substring(1);
   1422         }
   1423         return line.trim();
   1424     }
   1425 
   1426     public static void handleFile(String filename, LineHandler handler) throws IOException {
   1427         try (BufferedReader in = getUTF8Data(filename);) {
   1428             String line = null;
   1429             while ((line = in.readLine()) != null) {
   1430                 //                String line = in.readLine();
   1431                 //                if (line == null) {
   1432                 //                    break;
   1433                 //                }
   1434                 try {
   1435                     if (!handler.handle(line)) {
   1436                         if (HANDLEFILE_SHOW_SKIP) {
   1437                             System.out.println("Skipping line: " + line);
   1438                         }
   1439                     }
   1440                 } catch (Exception e) {
   1441                     throw (RuntimeException) new IllegalArgumentException("Problem with line: " + line)
   1442                         .initCause(e);
   1443                 }
   1444             }
   1445         }
   1446         //        in.close();
   1447     }
   1448 
   1449     public static <T> T ifNull(T x, T y) {
   1450         return x == null
   1451             ? y
   1452             : x;
   1453     }
   1454 
   1455     public static <T> T ifSame(T source, T replaceIfSame, T replacement) {
   1456         return source == replaceIfSame ? replacement : source;
   1457     }
   1458 
   1459     public static <T> T ifEqual(T source, T replaceIfSame, T replacement) {
   1460         return Objects.equals(source, replaceIfSame) ? replacement : source;
   1461     }
   1462 
   1463     public static <T> Set<T> intersect(Set<T> a, Collection<T> b) {
   1464         Set<T> result = new LinkedHashSet<>(a);
   1465         result.retainAll(b);
   1466         return result;
   1467     }
   1468 
   1469     public static <T> Set<T> subtract(Set<T> a, Collection<T> b) {
   1470         Set<T> result = new LinkedHashSet<>(a);
   1471         result.removeAll(b);
   1472         return result;
   1473     }
   1474 
   1475     public static <T> void logRegexLookup(TestFmwk testFramework, RegexLookup<T> lookup, String toLookup) {
   1476         Output<String[]> arguments = new Output<>();
   1477         Output<Finder> matcherFound = new Output<>();
   1478         List<String> failures = new ArrayList<String>();
   1479         lookup.get(toLookup, null, arguments, matcherFound, failures);
   1480         testFramework.logln("lookup arguments: " + (arguments.value == null ? "null" : Arrays.asList(arguments.value)));
   1481         testFramework.logln("lookup matcherFound: " + matcherFound);
   1482         for (String s : failures) {
   1483             testFramework.logln(s);
   1484         }
   1485     }
   1486 
   1487     public static boolean deepEquals(Object... pairs) {
   1488         for (int item = 0; item < pairs.length;) {
   1489             if (!Objects.deepEquals(pairs[item++], pairs[item++])) {
   1490                 return false;
   1491             }
   1492         }
   1493         return true;
   1494     }
   1495 
   1496     public static String[] array(Splitter splitter, String source) {
   1497         List<String> list = splitter.splitToList(source);
   1498         return list.toArray(new String[list.size()]);
   1499     }
   1500 
   1501     public static String toHex(String in, boolean javaStyle) {
   1502         StringBuilder result = new StringBuilder();
   1503         for (int i = 0; i < in.length(); ++i) {
   1504             result.append(toHex(in.charAt(i), javaStyle));
   1505         }
   1506         return result.toString();
   1507     }
   1508 
   1509     public static String toHex(int j, boolean javaStyle) {
   1510         if (j == '\"') {
   1511             return "\\\"";
   1512         } else if (j == '\\') {
   1513             return "\\\\";
   1514         } else if (0x20 < j && j < 0x7F) {
   1515             return String.valueOf((char) j);
   1516         }
   1517         final String hexString = Integer.toHexString(j).toUpperCase();
   1518         int gap = 4 - hexString.length();
   1519         if (gap < 0) {
   1520             gap = 0;
   1521         }
   1522         String prefix = javaStyle ? "\\u" : "U+";
   1523         return prefix + "000".substring(0, gap) + hexString;
   1524     }
   1525 
   1526     /**
   1527      * get string format for debugging, since Java has a useless display for many items
   1528      * @param item
   1529      * @return
   1530      */
   1531     public static String toString(Object item) {
   1532         if (item instanceof Object[]) {
   1533             return toString(Arrays.asList((Object[]) item));
   1534         } else if (item instanceof Entry) {
   1535             return toString(((Entry) item).getKey()) + "" + toString(((Entry) item).getValue());
   1536         } else if (item instanceof Map) {
   1537             return "{" + toString(((Map) item).entrySet()) + "}";
   1538         } else if (item instanceof Collection) {
   1539             List<String> result = new ArrayList<>();
   1540             for (Object subitem : (Collection) item) {
   1541                 result.add(toString(subitem));
   1542             }
   1543             return result.toString();
   1544         }
   1545         return item.toString();
   1546     }
   1547 }
   1548