Home | History | Annotate | Download | only in tool
      1 package org.unicode.cldr.tool;
      2 
      3 import java.util.EnumSet;
      4 import java.util.Objects;
      5 import java.util.Set;
      6 import java.util.TreeSet;
      7 import java.util.regex.Matcher;
      8 import java.util.regex.Pattern;
      9 
     10 import org.unicode.cldr.test.CoverageLevel2;
     11 import org.unicode.cldr.tool.Option.Options;
     12 import org.unicode.cldr.util.CLDRConfig;
     13 import org.unicode.cldr.util.CLDRFile;
     14 import org.unicode.cldr.util.CLDRFile.Status;
     15 import org.unicode.cldr.util.CLDRPaths;
     16 import org.unicode.cldr.util.Counter;
     17 import org.unicode.cldr.util.Factory;
     18 import org.unicode.cldr.util.Level;
     19 import org.unicode.cldr.util.PathHeader;
     20 import org.unicode.cldr.util.PathHeader.BaseUrl;
     21 import org.unicode.cldr.util.PatternCache;
     22 import org.unicode.cldr.util.StandardCodes;
     23 
     24 import com.google.common.collect.ImmutableSet;
     25 import com.ibm.icu.dev.util.CollectionUtilities;
     26 import com.ibm.icu.util.Output;
     27 
     28 public class SearchCLDR {
     29     // private static final int
     30     // HELP1 = 0,
     31     // HELP2 = 1,
     32     // SOURCEDIR = 2,
     33     // MATCH_FILE = 3,
     34     // MATCH_PATH = 4,
     35     // MATCH_VALUE = 5,
     36     // SHOW_PATH = 6,
     37     // SHOW_PARENT_VALUE = 7,
     38     // SHOW_ENGLISH_VALUE = 8
     39     // ;
     40     // private static final UOption[] options = {
     41     // UOption.HELP_H(),
     42     // UOption.HELP_QUESTION_MARK(),
     43     // UOption.SOURCEDIR().setDefault(CldrUtility.MAIN_DIRECTORY),
     44     // UOption.create("localematch", 'l', UOption.REQUIRES_ARG).setDefault(".*"),
     45     // UOption.create("pathmatch", 'p', UOption.REQUIRES_ARG).setDefault(".*"),
     46     // UOption.create("valuematch", 'v', UOption.REQUIRES_ARG).setDefault(".*"),
     47     // UOption.create("showPath", 'z', UOption.NO_ARG),
     48     // UOption.create("showParentValue", 'q', UOption.NO_ARG),
     49     // UOption.create("showEnglishValue", 'e', UOption.NO_ARG),
     50     // };
     51     // static final String HELP_TEXT1 = "Use the following options" + XPathParts.NEWLINE
     52     // + "-h or -?\t for this message" + XPathParts.NEWLINE
     53     // + "-"+options[SOURCEDIR].shortName + "\t source directory. Default = -s" +
     54     // CldrUtility.getCanonicalName(CldrUtility.MAIN_DIRECTORY) + XPathParts.NEWLINE
     55     // + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" + XPathParts.NEWLINE
     56     // + "-l<regex>\t to restrict the locales to what matches <regex>" + XPathParts.NEWLINE
     57     // + "-p<regex>\t to restrict the paths to what matches <regex>" + XPathParts.NEWLINE
     58     // + "-v<regex>\t to restrict the values to what matches <regex>" + XPathParts.NEWLINE
     59     // + "\t Remember to put .* on the front and back of any regex if you want to find any occurence."
     60     // + "-s\t show path"
     61     // + "-s\t show parent value"
     62     // + "-s\t show English value"
     63     // ;
     64 
     65     final static Options myOptions = new Options()
     66         .add("source", ".*", CLDRPaths.MAIN_DIRECTORY, "source directory")
     67         .add("file", ".*", ".*", "regex to filter files/locales.")
     68         .add("path", ".*", null, "regex to filter paths. ! in front selects items that don't match. example: -p relative.*@type=\\\"-?3\\\"")
     69         .add("value", ".*", null, "regex to filter values. ! in front selects items that don't match")
     70         .add("level", ".*", null, "regex to filter levels. ! in front selects items that don't match")
     71         .add("count", null, null, "only count items")
     72         .add("organization", ".*", null, "show level for organization")
     73         .add("z-showPath", null, null, "show paths")
     74         .add("resolved", null, null, "use resolved locales")
     75         .add("q-showParent", null, null, "show parent value")
     76         .add("english", null, null, "show english value")
     77         .add("Verbose", null, null, "verbose output")
     78         .add("PathHeader", null, null, "show path header and string ID");
     79 
     80     private static String fileMatcher;
     81     private static Matcher pathMatcher;
     82     private static boolean countOnly;
     83     private static boolean showPath;
     84     private static PathHeader.Factory PATH_HEADER_FACTORY = null;
     85 
     86     private static String organization;
     87 
     88     public static void main(String[] args) {
     89         myOptions.parse(args, true);
     90         // System.out.println("Arguments: " + CollectionUtilities.join(args, " "));
     91 
     92         long startTime = System.currentTimeMillis();
     93 
     94         String sourceDirectory = myOptions.get("source").getValue();
     95 
     96         Output<Boolean> exclude = new Output<Boolean>();
     97         fileMatcher = myOptions.get("file").getValue();
     98 
     99         pathMatcher = getMatcher(myOptions.get("path").getValue(), exclude);
    100         Boolean pathExclude = exclude.value;
    101 
    102         Set<Level> levelMatcher = getEnumMatcher(myOptions.get("level").getValue(), exclude);
    103 
    104         Matcher valueMatcher = getMatcher(myOptions.get("value").getValue(), exclude);
    105         Boolean valueExclude = exclude.value;
    106 
    107         countOnly = myOptions.get("count").doesOccur();
    108         boolean resolved = myOptions.get("resolved").doesOccur();
    109 
    110         showPath = myOptions.get("z-showPath").doesOccur();
    111         organization = myOptions.get("organization").getValue();
    112 
    113         if (myOptions.get("PathHeader").doesOccur()) {
    114             PATH_HEADER_FACTORY = PathHeader.getFactory(CLDRConfig.getInstance().getEnglish());
    115         }
    116 
    117         boolean showParent = myOptions.get("q-showParent").doesOccur();
    118 
    119         boolean showEnglish = myOptions.get("english").doesOccur();
    120 
    121         Factory cldrFactory = Factory.make(sourceDirectory, fileMatcher);
    122         Set<String> locales = new TreeSet<String>(cldrFactory.getAvailable());
    123 
    124         CLDRFile english = cldrFactory.make("en", true);
    125         PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(english);
    126 
    127         System.out.println("Searching...");
    128         System.out.println();
    129         System.out.flush();
    130         // PrettyPath pretty = new PrettyPath();
    131 
    132         if (countOnly) {
    133             System.out.print("file");
    134             for (Level cLevel : Level.values()) {
    135                 System.out.print("\t" + cLevel);
    136             }
    137             System.out.println();
    138         }
    139 
    140         for (String locale : locales) {
    141             Level organizationLevel = organization == null ? null
    142                 : StandardCodes.make().getLocaleCoverageLevel(organization, locale);
    143 
    144             CLDRFile file = (CLDRFile) cldrFactory.make(locale, resolved);
    145 
    146             Counter<Level> levelCounter = new Counter<Level>();
    147             //CLDRFile parent = null;
    148             boolean headerShown = false;
    149 
    150             // System.out.println("*Checking " + locale);
    151             CoverageLevel2 level = null;
    152             Level pathLevel = null;
    153 
    154             level = CoverageLevel2.getInstance(locale);
    155             Status status = new Status();
    156             Set<PathHeader> sorted = new TreeSet<PathHeader>();
    157             for (String path : file.fullIterable()) {
    158                 if (file.getStringValue(path) == null) {
    159                     continue;
    160                 }
    161                 sorted.add(pathHeaderFactory.fromPath(path));
    162             }
    163             for (PathHeader pathHeader : sorted) {
    164                 String path = pathHeader.getOriginalPath();
    165                 String fullPath = file.getFullXPath(path);
    166                 String value = file.getStringValue(path);
    167 
    168                 if (pathMatcher != null && pathExclude == pathMatcher.reset(fullPath).find()) {
    169                     continue;
    170                 }
    171 
    172                 {
    173                     pathLevel = level.getLevel(path);
    174                     levelCounter.add(pathLevel, 1);
    175                 }
    176 
    177                 if (!levelMatcher.contains(pathLevel)) {
    178                     continue;
    179                 }
    180 
    181                 if (valueMatcher != null && valueExclude == valueMatcher.reset(value).find()) {
    182                     continue;
    183                 }
    184 
    185                 // made it through the sieve
    186 
    187                 if (countOnly) {
    188                     continue;
    189                 }
    190                 if (!headerShown) {
    191                     showLine(showPath, showParent, showEnglish, resolved, locale, "Path", "Full-Path", "Value",
    192                         "PathHeader", "Parent-Value", "English-Value", "Source-Locale\tSource-Path", "Org-Level");
    193                     headerShown = true;
    194                 }
    195                 //                if (showParent && parent == null) {
    196                 //                    String parentLocale = LocaleIDParser.getParent(locale);
    197                 //                    parent = cldrFactory.make(parentLocale, true);
    198                 //                }
    199                 // String shortPath = pretty.getPrettyPath(path);
    200                 // String cleanShort = pretty.getOutputForm(shortPath);
    201                 String cleanShort = pathHeader.toString().replace('\t', '|');
    202                 final String resolvedSource = !resolved ? null
    203                     : file.getSourceLocaleID(path, status)
    204                         + (path.equals(status.pathWhereFound) ? "\t" : "\t" + status);
    205                 showLine(showPath, showParent, showEnglish, resolved, locale,
    206                     path, fullPath, value,
    207                     cleanShort,
    208                     !showParent ? null : english.getBaileyValue(path, null, null),
    209                     english == null ? null : english.getStringValue(path),
    210                     resolvedSource,
    211                     Objects.toString(pathLevel));
    212             }
    213             if (countOnly) {
    214                 System.out.print(locale);
    215                 for (Level cLevel : Level.values()) {
    216                     System.out.print("\t" + levelCounter.get(cLevel));
    217                 }
    218                 System.out.println();
    219             }
    220             System.out.flush();
    221         }
    222         System.out
    223             .println("Done -- Elapsed time: " + ((System.currentTimeMillis() - startTime) / 60000.0) + " minutes");
    224     }
    225 
    226     private static void showLine(boolean showPath, boolean showParent, boolean showEnglish,
    227         boolean resolved, String locale, String path, String fullPath, String value,
    228         String shortPath, String parentValue, String englishValue, String resolvedSource, String organizationLevel) {
    229         String pathHeaderInfo = "";
    230         if (PATH_HEADER_FACTORY != null) {
    231             PathHeader pathHeader = PATH_HEADER_FACTORY.fromPath(path);
    232             if (pathHeader != null) {
    233                 pathHeaderInfo = "\n\t" + pathHeader
    234                     + "\n\t" + pathHeader.getUrl(BaseUrl.PRODUCTION, locale);
    235             }
    236         }
    237         System.out.println(
    238             locale + "\t" + value + ""
    239                 + (showEnglish ? "\t" + englishValue + "" : "")
    240                 + (!showParent ? "" : CollectionUtilities.equals(value, parentValue) ? "\t" : "\t" + parentValue + "")
    241                 + "\t" + shortPath
    242                 + (showPath ? "\t" + fullPath : "")
    243                 + (resolved ? "\t" + resolvedSource : "")
    244                 + (organizationLevel != null ? "\t" + organizationLevel : "")
    245                 + pathHeaderInfo);
    246     }
    247 
    248     private static Matcher getMatcher(String property, Output<Boolean> exclude) {
    249         exclude.value = false;
    250         if (property == null) {
    251             return null;
    252         }
    253         if (property.startsWith("!")) {
    254             exclude.value = true;
    255             property = property.substring(1);
    256         }
    257         return PatternCache.get(property).matcher("");
    258     }
    259 
    260     private static Set<Level> getEnumMatcher(String property, Output<Boolean> exclude) {
    261         exclude.value = false;
    262         if (property == null) {
    263             return null;
    264         }
    265         if (property.startsWith("!")) {
    266             exclude.value = true;
    267             property = property.substring(1);
    268         }
    269         EnumSet<Level> result = EnumSet.noneOf(Level.class);
    270         Matcher matcher = Pattern.compile(property, Pattern.CASE_INSENSITIVE).matcher("");
    271 
    272         for (Level level : Level.values()) {
    273             if (matcher.reset(level.toString()).matches() != exclude.value) {
    274                 result.add(level);
    275             }
    276         }
    277         return ImmutableSet.copyOf(result);
    278     }
    279 }