1 package org.unicode.cldr.tool; 2 3 import java.util.EnumSet; 4 import java.util.Objects; 5 import java.util.Set; 6 import java.util.TreeSet; 7 import java.util.regex.Matcher; 8 import java.util.regex.Pattern; 9 10 import org.unicode.cldr.test.CoverageLevel2; 11 import org.unicode.cldr.tool.Option.Options; 12 import org.unicode.cldr.util.CLDRConfig; 13 import org.unicode.cldr.util.CLDRFile; 14 import org.unicode.cldr.util.CLDRFile.Status; 15 import org.unicode.cldr.util.CLDRPaths; 16 import org.unicode.cldr.util.Counter; 17 import org.unicode.cldr.util.Factory; 18 import org.unicode.cldr.util.Level; 19 import org.unicode.cldr.util.PathHeader; 20 import org.unicode.cldr.util.PathHeader.BaseUrl; 21 import org.unicode.cldr.util.PatternCache; 22 import org.unicode.cldr.util.StandardCodes; 23 24 import com.google.common.collect.ImmutableSet; 25 import com.ibm.icu.dev.util.CollectionUtilities; 26 import com.ibm.icu.util.Output; 27 28 public class SearchCLDR { 29 // private static final int 30 // HELP1 = 0, 31 // HELP2 = 1, 32 // SOURCEDIR = 2, 33 // MATCH_FILE = 3, 34 // MATCH_PATH = 4, 35 // MATCH_VALUE = 5, 36 // SHOW_PATH = 6, 37 // SHOW_PARENT_VALUE = 7, 38 // SHOW_ENGLISH_VALUE = 8 39 // ; 40 // private static final UOption[] options = { 41 // UOption.HELP_H(), 42 // UOption.HELP_QUESTION_MARK(), 43 // UOption.SOURCEDIR().setDefault(CldrUtility.MAIN_DIRECTORY), 44 // UOption.create("localematch", 'l', UOption.REQUIRES_ARG).setDefault(".*"), 45 // UOption.create("pathmatch", 'p', UOption.REQUIRES_ARG).setDefault(".*"), 46 // UOption.create("valuematch", 'v', UOption.REQUIRES_ARG).setDefault(".*"), 47 // UOption.create("showPath", 'z', UOption.NO_ARG), 48 // UOption.create("showParentValue", 'q', UOption.NO_ARG), 49 // UOption.create("showEnglishValue", 'e', UOption.NO_ARG), 50 // }; 51 // static final String HELP_TEXT1 = "Use the following options" + XPathParts.NEWLINE 52 // + "-h or -?\t for this message" + XPathParts.NEWLINE 53 // + "-"+options[SOURCEDIR].shortName + "\t source directory. Default = -s" + 54 // CldrUtility.getCanonicalName(CldrUtility.MAIN_DIRECTORY) + XPathParts.NEWLINE 55 // + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" + XPathParts.NEWLINE 56 // + "-l<regex>\t to restrict the locales to what matches <regex>" + XPathParts.NEWLINE 57 // + "-p<regex>\t to restrict the paths to what matches <regex>" + XPathParts.NEWLINE 58 // + "-v<regex>\t to restrict the values to what matches <regex>" + XPathParts.NEWLINE 59 // + "\t Remember to put .* on the front and back of any regex if you want to find any occurence." 60 // + "-s\t show path" 61 // + "-s\t show parent value" 62 // + "-s\t show English value" 63 // ; 64 65 final static Options myOptions = new Options() 66 .add("source", ".*", CLDRPaths.MAIN_DIRECTORY, "source directory") 67 .add("file", ".*", ".*", "regex to filter files/locales.") 68 .add("path", ".*", null, "regex to filter paths. ! in front selects items that don't match. example: -p relative.*@type=\\\"-?3\\\"") 69 .add("value", ".*", null, "regex to filter values. ! in front selects items that don't match") 70 .add("level", ".*", null, "regex to filter levels. ! in front selects items that don't match") 71 .add("count", null, null, "only count items") 72 .add("organization", ".*", null, "show level for organization") 73 .add("z-showPath", null, null, "show paths") 74 .add("resolved", null, null, "use resolved locales") 75 .add("q-showParent", null, null, "show parent value") 76 .add("english", null, null, "show english value") 77 .add("Verbose", null, null, "verbose output") 78 .add("PathHeader", null, null, "show path header and string ID"); 79 80 private static String fileMatcher; 81 private static Matcher pathMatcher; 82 private static boolean countOnly; 83 private static boolean showPath; 84 private static PathHeader.Factory PATH_HEADER_FACTORY = null; 85 86 private static String organization; 87 88 public static void main(String[] args) { 89 myOptions.parse(args, true); 90 // System.out.println("Arguments: " + CollectionUtilities.join(args, " ")); 91 92 long startTime = System.currentTimeMillis(); 93 94 String sourceDirectory = myOptions.get("source").getValue(); 95 96 Output<Boolean> exclude = new Output<Boolean>(); 97 fileMatcher = myOptions.get("file").getValue(); 98 99 pathMatcher = getMatcher(myOptions.get("path").getValue(), exclude); 100 Boolean pathExclude = exclude.value; 101 102 Set<Level> levelMatcher = getEnumMatcher(myOptions.get("level").getValue(), exclude); 103 104 Matcher valueMatcher = getMatcher(myOptions.get("value").getValue(), exclude); 105 Boolean valueExclude = exclude.value; 106 107 countOnly = myOptions.get("count").doesOccur(); 108 boolean resolved = myOptions.get("resolved").doesOccur(); 109 110 showPath = myOptions.get("z-showPath").doesOccur(); 111 organization = myOptions.get("organization").getValue(); 112 113 if (myOptions.get("PathHeader").doesOccur()) { 114 PATH_HEADER_FACTORY = PathHeader.getFactory(CLDRConfig.getInstance().getEnglish()); 115 } 116 117 boolean showParent = myOptions.get("q-showParent").doesOccur(); 118 119 boolean showEnglish = myOptions.get("english").doesOccur(); 120 121 Factory cldrFactory = Factory.make(sourceDirectory, fileMatcher); 122 Set<String> locales = new TreeSet<String>(cldrFactory.getAvailable()); 123 124 CLDRFile english = cldrFactory.make("en", true); 125 PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(english); 126 127 System.out.println("Searching..."); 128 System.out.println(); 129 System.out.flush(); 130 // PrettyPath pretty = new PrettyPath(); 131 132 if (countOnly) { 133 System.out.print("file"); 134 for (Level cLevel : Level.values()) { 135 System.out.print("\t" + cLevel); 136 } 137 System.out.println(); 138 } 139 140 for (String locale : locales) { 141 Level organizationLevel = organization == null ? null 142 : StandardCodes.make().getLocaleCoverageLevel(organization, locale); 143 144 CLDRFile file = (CLDRFile) cldrFactory.make(locale, resolved); 145 146 Counter<Level> levelCounter = new Counter<Level>(); 147 //CLDRFile parent = null; 148 boolean headerShown = false; 149 150 // System.out.println("*Checking " + locale); 151 CoverageLevel2 level = null; 152 Level pathLevel = null; 153 154 level = CoverageLevel2.getInstance(locale); 155 Status status = new Status(); 156 Set<PathHeader> sorted = new TreeSet<PathHeader>(); 157 for (String path : file.fullIterable()) { 158 if (file.getStringValue(path) == null) { 159 continue; 160 } 161 sorted.add(pathHeaderFactory.fromPath(path)); 162 } 163 for (PathHeader pathHeader : sorted) { 164 String path = pathHeader.getOriginalPath(); 165 String fullPath = file.getFullXPath(path); 166 String value = file.getStringValue(path); 167 168 if (pathMatcher != null && pathExclude == pathMatcher.reset(fullPath).find()) { 169 continue; 170 } 171 172 { 173 pathLevel = level.getLevel(path); 174 levelCounter.add(pathLevel, 1); 175 } 176 177 if (!levelMatcher.contains(pathLevel)) { 178 continue; 179 } 180 181 if (valueMatcher != null && valueExclude == valueMatcher.reset(value).find()) { 182 continue; 183 } 184 185 // made it through the sieve 186 187 if (countOnly) { 188 continue; 189 } 190 if (!headerShown) { 191 showLine(showPath, showParent, showEnglish, resolved, locale, "Path", "Full-Path", "Value", 192 "PathHeader", "Parent-Value", "English-Value", "Source-Locale\tSource-Path", "Org-Level"); 193 headerShown = true; 194 } 195 // if (showParent && parent == null) { 196 // String parentLocale = LocaleIDParser.getParent(locale); 197 // parent = cldrFactory.make(parentLocale, true); 198 // } 199 // String shortPath = pretty.getPrettyPath(path); 200 // String cleanShort = pretty.getOutputForm(shortPath); 201 String cleanShort = pathHeader.toString().replace('\t', '|'); 202 final String resolvedSource = !resolved ? null 203 : file.getSourceLocaleID(path, status) 204 + (path.equals(status.pathWhereFound) ? "\t" : "\t" + status); 205 showLine(showPath, showParent, showEnglish, resolved, locale, 206 path, fullPath, value, 207 cleanShort, 208 !showParent ? null : english.getBaileyValue(path, null, null), 209 english == null ? null : english.getStringValue(path), 210 resolvedSource, 211 Objects.toString(pathLevel)); 212 } 213 if (countOnly) { 214 System.out.print(locale); 215 for (Level cLevel : Level.values()) { 216 System.out.print("\t" + levelCounter.get(cLevel)); 217 } 218 System.out.println(); 219 } 220 System.out.flush(); 221 } 222 System.out 223 .println("Done -- Elapsed time: " + ((System.currentTimeMillis() - startTime) / 60000.0) + " minutes"); 224 } 225 226 private static void showLine(boolean showPath, boolean showParent, boolean showEnglish, 227 boolean resolved, String locale, String path, String fullPath, String value, 228 String shortPath, String parentValue, String englishValue, String resolvedSource, String organizationLevel) { 229 String pathHeaderInfo = ""; 230 if (PATH_HEADER_FACTORY != null) { 231 PathHeader pathHeader = PATH_HEADER_FACTORY.fromPath(path); 232 if (pathHeader != null) { 233 pathHeaderInfo = "\n\t" + pathHeader 234 + "\n\t" + pathHeader.getUrl(BaseUrl.PRODUCTION, locale); 235 } 236 } 237 System.out.println( 238 locale + "\t" + value + "" 239 + (showEnglish ? "\t" + englishValue + "" : "") 240 + (!showParent ? "" : CollectionUtilities.equals(value, parentValue) ? "\t" : "\t" + parentValue + "") 241 + "\t" + shortPath 242 + (showPath ? "\t" + fullPath : "") 243 + (resolved ? "\t" + resolvedSource : "") 244 + (organizationLevel != null ? "\t" + organizationLevel : "") 245 + pathHeaderInfo); 246 } 247 248 private static Matcher getMatcher(String property, Output<Boolean> exclude) { 249 exclude.value = false; 250 if (property == null) { 251 return null; 252 } 253 if (property.startsWith("!")) { 254 exclude.value = true; 255 property = property.substring(1); 256 } 257 return PatternCache.get(property).matcher(""); 258 } 259 260 private static Set<Level> getEnumMatcher(String property, Output<Boolean> exclude) { 261 exclude.value = false; 262 if (property == null) { 263 return null; 264 } 265 if (property.startsWith("!")) { 266 exclude.value = true; 267 property = property.substring(1); 268 } 269 EnumSet<Level> result = EnumSet.noneOf(Level.class); 270 Matcher matcher = Pattern.compile(property, Pattern.CASE_INSENSITIVE).matcher(""); 271 272 for (Level level : Level.values()) { 273 if (matcher.reset(level.toString()).matches() != exclude.value) { 274 result.add(level); 275 } 276 } 277 return ImmutableSet.copyOf(result); 278 } 279 }