Home | History | Annotate | Download | only in translit
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4 **********************************************************************
      5 *   Copyright (c) 2001-2010, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *   Date        Name        Description
      9 *   11/29/2001  aliu        Creation.
     10 *   06/26/2002  aliu        Moved to com.ibm.icu.dev.tool.translit
     11 **********************************************************************
     12 */
     13 package com.ibm.icu.dev.tool.translit;
     14 import java.io.File;
     15 import java.io.FileOutputStream;
     16 import java.io.IOException;
     17 import java.io.OutputStreamWriter;
     18 import java.io.PrintWriter;
     19 import java.util.Enumeration;
     20 
     21 import com.ibm.icu.text.Normalizer;
     22 import com.ibm.icu.text.Transliterator;
     23 import com.ibm.icu.text.UnicodeSet;
     24 
     25 /**
     26  * Class that generates source set information for a transliterator.
     27  *
     28  * To run, use:
     29  *
     30  *   java com.ibm.icu.dev.tool.translit.SourceSet Latin-Katakana NFD lower
     31  *
     32  * Output is produced in the command console, and a file with more detail is also written.
     33  *
     34  * To see if it works, use:
     35  *
     36  *   java com.ibm.icu.dev.test.translit.TransliteratorTest -v -nothrow TestIncrementalProgress
     37  *
     38  * and
     39  *
     40  *   java com.ibm.icu.dev.demo.translit.Demo
     41  */
     42 public class SourceSet {
     43 
     44     public static void main(String[] args) throws IOException {
     45         if (args.length == 0) {
     46             // Compute and display the source sets for all system
     47             // transliterators.
     48             for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
     49                 String ID = (String) e.nextElement();
     50                 showSourceSet(ID, Normalizer.NONE, false);
     51             }
     52         } else {
     53             // Usage: ID [NFKD | NFD] [lower]
     54             Normalizer.Mode m = Normalizer.NONE;
     55             boolean lowerFirst = false;
     56             if (args.length >= 2) {
     57                 if (args[1].equalsIgnoreCase("NFD")) {
     58                     m = Normalizer.NFD;
     59                 } else if (args[1].equalsIgnoreCase("NFKD")) {
     60                     m = Normalizer.NFKD;
     61                 } else {
     62                     usage();
     63                 }
     64             }
     65             if (args.length >= 3) {
     66                 if (args[2].equalsIgnoreCase("lower")) {
     67                     lowerFirst = true;
     68                 } else {
     69                     usage();
     70                 }
     71             }
     72             if (args.length > 3) {
     73                 usage();
     74             }
     75             showSourceSet(args[0], m, lowerFirst);
     76         }
     77     }
     78 
     79     static void showSourceSet(String ID, Normalizer.Mode m, boolean lowerFirst) throws IOException {
     80         File f = new File("UnicodeSetClosure.txt");
     81         String filename = f.getCanonicalFile().toString();
     82         out = new PrintWriter(
     83             new OutputStreamWriter(
     84                 new FileOutputStream(filename), "UTF-8"));
     85         out.print('\uFEFF'); // BOM
     86         System.out.println();
     87         System.out.println("Writing " + filename);
     88         Transliterator t = Transliterator.getInstance(ID);
     89         showSourceSetAux(t, m, lowerFirst, true);
     90         showSourceSetAux(t.getInverse(), m, lowerFirst, false);
     91         out.close();
     92     }
     93 
     94     static PrintWriter out;
     95 
     96     static void showSourceSetAux(Transliterator t, Normalizer.Mode m, boolean lowerFirst, boolean forward) {
     97         UnicodeSet sourceSet = t.getSourceSet();
     98         if (m != Normalizer.NONE || lowerFirst) {
     99             UnicodeSetClosure.close(sourceSet, m, lowerFirst);
    100         }
    101         System.out.println(t.getID() + ": " +
    102                            sourceSet.toPattern(true));
    103         out.println("# MINIMAL FILTER GENERATED FOR: " + t.getID() + (forward ? "" : " REVERSE"));
    104         out.println(":: "
    105             + (forward ? "" : "( ")
    106             + sourceSet.toPattern(true)
    107             + (forward ? "" : " )")
    108             + " ;");
    109         out.println("# Unicode: " + sourceSet.toPattern(false));
    110         out.println();
    111     }
    112 
    113     static void usage() {
    114         System.err.println("Usage: ID [ NFD|NFKD [lower] ]");
    115         System.exit(1);
    116     }
    117 }
    118