Home | History | Annotate | Download | only in translit
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /**
      4  *******************************************************************************
      5  * Copyright (C) 2001-2010, International Business Machines Corporation and    *
      6  * others. All Rights Reserved.                                                *
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.dev.tool.translit;
     10 
     11 import java.io.BufferedReader;
     12 import java.io.FileInputStream;
     13 import java.io.FileOutputStream;
     14 import java.io.IOException;
     15 import java.io.InputStreamReader;
     16 import java.io.OutputStreamWriter;
     17 import java.io.PrintWriter;
     18 
     19 import com.ibm.icu.text.Transliterator;
     20 
     21 /**
     22  * A command-line interface to the ICU4J transliterators.
     23  * @author Alan Liu
     24  */
     25 public class Trans {
     26 
     27     public static void main(String[] args) throws Exception {
     28         boolean isHTML = false;
     29         int pos = 0;
     30 
     31         String transName = null; // first untagged string is this
     32         String inText = null; // all other untagged strings are this
     33         String inName = null;
     34         String outName = null;
     35 
     36         while (pos < args.length) {
     37             if (args[pos].equals("-html")) {
     38                 isHTML = true;
     39             } else if (args[pos].equals("-i")) {
     40                 if (++pos == args.length) usage();
     41                 inName = args[pos];
     42             } else if (args[pos].equals("-o")) {
     43                 if (++pos == args.length) usage();
     44                 outName = args[pos];
     45             } else if (transName == null) {
     46                 transName = args[pos];
     47             } else {
     48                 if (inText == null) {
     49                     inText = args[pos];
     50                 } else {
     51                     inText = inText + " " + args[pos];
     52                 }
     53             }
     54             ++pos;
     55         }
     56 
     57         if (inText != null && inName != null) {
     58             usage();
     59         }
     60 
     61         Transliterator trans = Transliterator.getInstance(transName);
     62         BufferedReader in = null;
     63         if (inName != null) {
     64             in = new BufferedReader(new InputStreamReader(new FileInputStream(inName), "UTF8"));
     65         }
     66         PrintWriter out = null;
     67         if (outName != null) {
     68             out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outName), "UTF8"));
     69         } else {
     70             out = new PrintWriter(System.out);
     71         }
     72         trans(trans, inText, in, out, isHTML);
     73         out.close();
     74     }
     75 
     76     static void trans(Transliterator trans, String inText,
     77                       BufferedReader in, PrintWriter out, boolean isHTML) throws IOException {
     78         boolean inTag = false; // If true, we are within a <tag>
     79         for (;;) {
     80             String line = null;
     81             if (inText != null) {
     82                 line = inText;
     83                 inText = null;
     84             } else if (in != null) {
     85                 line = in.readLine();
     86             }
     87             if (line == null) {
     88                 break;
     89             }
     90             if (isHTML) {
     91                 // Pass tags between < and > unchanged
     92                 StringBuffer buf = new StringBuffer();
     93                 int right = -1;
     94                 if (inTag) {
     95                     right = line.indexOf('>');
     96                     if (right < 0) {
     97                         right = line.length()-1;
     98                     }
     99                     buf.append(line.substring(0, right+1));
    100                     if (DEBUG) System.out.println("*S:" + line.substring(0, right+1));
    101                     inTag = false;
    102                 }
    103                 for (;;) {
    104                     int left = line.indexOf('<', right+1);
    105                     if (left < 0) {
    106                         if (right < line.length()-1) {
    107                             buf.append(trans.transliterate(line.substring(right+1)));
    108                             if (DEBUG) System.out.println("T:" + line.substring(right+1));
    109                         }
    110                         break;
    111                     }
    112                     // Append transliterated segment right+1..left-1
    113                     buf.append(trans.transliterate(line.substring(right+1, left)));
    114                     if (DEBUG) System.out.println("T:" + line.substring(right+1, left));
    115                     right = line.indexOf('>', left+1);
    116                     if (right < 0) {
    117                         inTag = true;
    118                         buf.append(line.substring(left));
    119                         if (DEBUG) System.out.println("S:" + line.substring(left));
    120                         break;
    121                     }
    122                     buf.append(line.substring(left, right+1));
    123                     if (DEBUG) System.out.println("S:" + line.substring(left, right+1));
    124                 }
    125                 line = buf.toString();
    126             } else {
    127                 line = trans.transliterate(line);
    128             }
    129             out.println(line);
    130         }
    131     }
    132 
    133     static final boolean DEBUG = false;
    134 
    135     /**
    136      * Emit usage and die.
    137      */
    138     static void usage() {
    139         System.out.println("Usage: java com.ibm.icu.dev.tool.translit.Trans [-html] <trans> ( <input> | -i <infile>) [ -o <outfile> ]");
    140         System.out.println("<trans>   Name of transliterator");
    141         System.out.println("<input>   Text to transliterate");
    142         System.out.println("<infile>  Name of input file");
    143         System.out.println("<outfile> Name of output file");
    144         System.out.println("-html     Only transliterate text outside of <tags>");
    145         System.out.println("Input may come from the command line or a file.\n");
    146         System.out.println("Ouput may go to stdout or a file.\n");
    147         System.exit(0);
    148     }
    149 }
    150