1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2001-2010, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.dev.tool.translit; 10 11 import java.io.BufferedReader; 12 import java.io.FileInputStream; 13 import java.io.FileOutputStream; 14 import java.io.IOException; 15 import java.io.InputStreamReader; 16 import java.io.OutputStreamWriter; 17 import java.io.PrintWriter; 18 19 import com.ibm.icu.text.Transliterator; 20 21 /** 22 * A command-line interface to the ICU4J transliterators. 23 * @author Alan Liu 24 */ 25 public class Trans { 26 27 public static void main(String[] args) throws Exception { 28 boolean isHTML = false; 29 int pos = 0; 30 31 String transName = null; // first untagged string is this 32 String inText = null; // all other untagged strings are this 33 String inName = null; 34 String outName = null; 35 36 while (pos < args.length) { 37 if (args[pos].equals("-html")) { 38 isHTML = true; 39 } else if (args[pos].equals("-i")) { 40 if (++pos == args.length) usage(); 41 inName = args[pos]; 42 } else if (args[pos].equals("-o")) { 43 if (++pos == args.length) usage(); 44 outName = args[pos]; 45 } else if (transName == null) { 46 transName = args[pos]; 47 } else { 48 if (inText == null) { 49 inText = args[pos]; 50 } else { 51 inText = inText + " " + args[pos]; 52 } 53 } 54 ++pos; 55 } 56 57 if (inText != null && inName != null) { 58 usage(); 59 } 60 61 Transliterator trans = Transliterator.getInstance(transName); 62 BufferedReader in = null; 63 if (inName != null) { 64 in = new BufferedReader(new InputStreamReader(new FileInputStream(inName), "UTF8")); 65 } 66 PrintWriter out = null; 67 if (outName != null) { 68 out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outName), "UTF8")); 69 } else { 70 out = new PrintWriter(System.out); 71 } 72 trans(trans, inText, in, out, isHTML); 73 out.close(); 74 } 75 76 static void trans(Transliterator trans, String inText, 77 BufferedReader in, PrintWriter out, boolean isHTML) throws IOException { 78 boolean inTag = false; // If true, we are within a <tag> 79 for (;;) { 80 String line = null; 81 if (inText != null) { 82 line = inText; 83 inText = null; 84 } else if (in != null) { 85 line = in.readLine(); 86 } 87 if (line == null) { 88 break; 89 } 90 if (isHTML) { 91 // Pass tags between < and > unchanged 92 StringBuffer buf = new StringBuffer(); 93 int right = -1; 94 if (inTag) { 95 right = line.indexOf('>'); 96 if (right < 0) { 97 right = line.length()-1; 98 } 99 buf.append(line.substring(0, right+1)); 100 if (DEBUG) System.out.println("*S:" + line.substring(0, right+1)); 101 inTag = false; 102 } 103 for (;;) { 104 int left = line.indexOf('<', right+1); 105 if (left < 0) { 106 if (right < line.length()-1) { 107 buf.append(trans.transliterate(line.substring(right+1))); 108 if (DEBUG) System.out.println("T:" + line.substring(right+1)); 109 } 110 break; 111 } 112 // Append transliterated segment right+1..left-1 113 buf.append(trans.transliterate(line.substring(right+1, left))); 114 if (DEBUG) System.out.println("T:" + line.substring(right+1, left)); 115 right = line.indexOf('>', left+1); 116 if (right < 0) { 117 inTag = true; 118 buf.append(line.substring(left)); 119 if (DEBUG) System.out.println("S:" + line.substring(left)); 120 break; 121 } 122 buf.append(line.substring(left, right+1)); 123 if (DEBUG) System.out.println("S:" + line.substring(left, right+1)); 124 } 125 line = buf.toString(); 126 } else { 127 line = trans.transliterate(line); 128 } 129 out.println(line); 130 } 131 } 132 133 static final boolean DEBUG = false; 134 135 /** 136 * Emit usage and die. 137 */ 138 static void usage() { 139 System.out.println("Usage: java com.ibm.icu.dev.tool.translit.Trans [-html] <trans> ( <input> | -i <infile>) [ -o <outfile> ]"); 140 System.out.println("<trans> Name of transliterator"); 141 System.out.println("<input> Text to transliterate"); 142 System.out.println("<infile> Name of input file"); 143 System.out.println("<outfile> Name of output file"); 144 System.out.println("-html Only transliterate text outside of <tags>"); 145 System.out.println("Input may come from the command line or a file.\n"); 146 System.out.println("Ouput may go to stdout or a file.\n"); 147 System.exit(0); 148 } 149 } 150