1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ********************************************************************** 5 * Copyright (c) 2001-2010, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Date Name Description 9 * 11/29/2001 aliu Creation. 10 * 06/26/2002 aliu Moved to com.ibm.icu.dev.tool.translit 11 ********************************************************************** 12 */ 13 package com.ibm.icu.dev.tool.translit; 14 import java.io.File; 15 import java.io.FileOutputStream; 16 import java.io.IOException; 17 import java.io.OutputStreamWriter; 18 import java.io.PrintWriter; 19 import java.util.Enumeration; 20 21 import com.ibm.icu.text.Normalizer; 22 import com.ibm.icu.text.Transliterator; 23 import com.ibm.icu.text.UnicodeSet; 24 25 /** 26 * Class that generates source set information for a transliterator. 27 * 28 * To run, use: 29 * 30 * java com.ibm.icu.dev.tool.translit.SourceSet Latin-Katakana NFD lower 31 * 32 * Output is produced in the command console, and a file with more detail is also written. 33 * 34 * To see if it works, use: 35 * 36 * java com.ibm.icu.dev.test.translit.TransliteratorTest -v -nothrow TestIncrementalProgress 37 * 38 * and 39 * 40 * java com.ibm.icu.dev.demo.translit.Demo 41 */ 42 public class SourceSet { 43 44 public static void main(String[] args) throws IOException { 45 if (args.length == 0) { 46 // Compute and display the source sets for all system 47 // transliterators. 48 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 49 String ID = (String) e.nextElement(); 50 showSourceSet(ID, Normalizer.NONE, false); 51 } 52 } else { 53 // Usage: ID [NFKD | NFD] [lower] 54 Normalizer.Mode m = Normalizer.NONE; 55 boolean lowerFirst = false; 56 if (args.length >= 2) { 57 if (args[1].equalsIgnoreCase("NFD")) { 58 m = Normalizer.NFD; 59 } else if (args[1].equalsIgnoreCase("NFKD")) { 60 m = Normalizer.NFKD; 61 } else { 62 usage(); 63 } 64 } 65 if (args.length >= 3) { 66 if (args[2].equalsIgnoreCase("lower")) { 67 lowerFirst = true; 68 } else { 69 usage(); 70 } 71 } 72 if (args.length > 3) { 73 usage(); 74 } 75 showSourceSet(args[0], m, lowerFirst); 76 } 77 } 78 79 static void showSourceSet(String ID, Normalizer.Mode m, boolean lowerFirst) throws IOException { 80 File f = new File("UnicodeSetClosure.txt"); 81 String filename = f.getCanonicalFile().toString(); 82 out = new PrintWriter( 83 new OutputStreamWriter( 84 new FileOutputStream(filename), "UTF-8")); 85 out.print('\uFEFF'); // BOM 86 System.out.println(); 87 System.out.println("Writing " + filename); 88 Transliterator t = Transliterator.getInstance(ID); 89 showSourceSetAux(t, m, lowerFirst, true); 90 showSourceSetAux(t.getInverse(), m, lowerFirst, false); 91 out.close(); 92 } 93 94 static PrintWriter out; 95 96 static void showSourceSetAux(Transliterator t, Normalizer.Mode m, boolean lowerFirst, boolean forward) { 97 UnicodeSet sourceSet = t.getSourceSet(); 98 if (m != Normalizer.NONE || lowerFirst) { 99 UnicodeSetClosure.close(sourceSet, m, lowerFirst); 100 } 101 System.out.println(t.getID() + ": " + 102 sourceSet.toPattern(true)); 103 out.println("# MINIMAL FILTER GENERATED FOR: " + t.getID() + (forward ? "" : " REVERSE")); 104 out.println(":: " 105 + (forward ? "" : "( ") 106 + sourceSet.toPattern(true) 107 + (forward ? "" : " )") 108 + " ;"); 109 out.println("# Unicode: " + sourceSet.toPattern(false)); 110 out.println(); 111 } 112 113 static void usage() { 114 System.err.println("Usage: ID [ NFD|NFKD [lower] ]"); 115 System.exit(1); 116 } 117 } 118