Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import java.io.File;
     20 import java.io.FileInputStream;
     21 import java.io.FileNotFoundException;
     22 import java.io.FileOutputStream;
     23 import java.io.FileWriter;
     24 import java.io.IOException;
     25 import java.io.RandomAccessFile;
     26 import java.util.Arrays;
     27 import java.util.LinkedList;
     28 
     29 import javax.xml.parsers.ParserConfigurationException;
     30 
     31 import org.xml.sax.SAXException;
     32 
     33 /**
     34  * Main class/method for DictionaryMaker.
     35  */
     36 public class DictionaryMaker {
     37 
     38     static class Arguments {
     39         private final static String OPTION_VERSION_2 = "-2";
     40         private final static String OPTION_INPUT_SOURCE = "-s";
     41         private final static String OPTION_INPUT_BIGRAM_XML = "-b";
     42         private final static String OPTION_OUTPUT_BINARY = "-d";
     43         private final static String OPTION_OUTPUT_XML = "-x";
     44         private final static String OPTION_HELP = "-h";
     45         public final String mInputBinary;
     46         public final String mInputUnigramXml;
     47         public final String mInputBigramXml;
     48         public final String mOutputBinary;
     49         public final String mOutputXml;
     50 
     51         private void checkIntegrity() {
     52             checkHasExactlyOneInput();
     53             checkHasAtLeastOneOutput();
     54         }
     55 
     56         private void checkHasExactlyOneInput() {
     57             if (null == mInputUnigramXml && null == mInputBinary) {
     58                 throw new RuntimeException("No input file specified");
     59             } else if (null != mInputUnigramXml && null != mInputBinary) {
     60                 throw new RuntimeException("Both input XML and binary specified");
     61             } else if (null != mInputBinary && null != mInputBigramXml) {
     62                 throw new RuntimeException("Cannot specify a binary input and a separate bigram "
     63                         + "file");
     64             }
     65         }
     66 
     67         private void checkHasAtLeastOneOutput() {
     68             if (null == mOutputBinary && null == mOutputXml) {
     69                 throw new RuntimeException("No output specified");
     70             }
     71         }
     72 
     73         private void displayHelp() {
     74             MakedictLog.i("Usage: makedict "
     75                     + "[-s <unigrams.xml> [-b <bigrams.xml>] | -s <binary input>] "
     76                     + " [-d <binary output>] [-x <xml output>] [-2]\n"
     77                     + "\n"
     78                     + "  Converts a source dictionary file to one or several outputs.\n"
     79                     + "  Source can be an XML file, with an optional XML bigrams file, or a\n"
     80                     + "  binary dictionary file.\n"
     81                     + "  Both binary and XML outputs are supported. Both can be output at\n"
     82                     + "  the same time but outputting several files of the same type is not\n"
     83                     + "  supported.");
     84         }
     85 
     86         public Arguments(String[] argsArray) {
     87             final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray));
     88             if (args.isEmpty()) {
     89                 displayHelp();
     90             }
     91             String inputBinary = null;
     92             String inputUnigramXml = null;
     93             String inputBigramXml = null;
     94             String outputBinary = null;
     95             String outputXml = null;
     96 
     97             while (!args.isEmpty()) {
     98                 final String arg = args.get(0);
     99                 args.remove(0);
    100                 if (arg.charAt(0) == '-') {
    101                     if (OPTION_VERSION_2.equals(arg)) {
    102                         // Do nothing, this is the default
    103                     } else if (OPTION_HELP.equals(arg)) {
    104                         displayHelp();
    105                     } else {
    106                         // All these options need an argument
    107                         if (args.isEmpty()) {
    108                             throw new RuntimeException("Option " + arg + " requires an argument");
    109                         }
    110                         String filename = args.get(0);
    111                         args.remove(0);
    112                         if (OPTION_INPUT_SOURCE.equals(arg)) {
    113                             if (BinaryDictInputOutput.isBinaryDictionary(filename)) {
    114                                 inputBinary = filename;
    115                             } else {
    116                                 inputUnigramXml = filename;
    117                             }
    118                         } else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) {
    119                             inputBigramXml = filename;
    120                         } else if (OPTION_OUTPUT_BINARY.equals(arg)) {
    121                             outputBinary = filename;
    122                         } else if (OPTION_OUTPUT_XML.equals(arg)) {
    123                             outputXml = filename;
    124                         }
    125                     }
    126                 } else {
    127                     if (null == inputBinary && null == inputUnigramXml) {
    128                         if (BinaryDictInputOutput.isBinaryDictionary(arg)) {
    129                             inputBinary = arg;
    130                         } else {
    131                             inputUnigramXml = arg;
    132                         }
    133                     } else if (null == outputBinary) {
    134                         outputBinary = arg;
    135                     } else {
    136                         throw new RuntimeException("Several output binary files specified");
    137                     }
    138                 }
    139             }
    140 
    141             mInputBinary = inputBinary;
    142             mInputUnigramXml = inputUnigramXml;
    143             mInputBigramXml = inputBigramXml;
    144             mOutputBinary = outputBinary;
    145             mOutputXml = outputXml;
    146             checkIntegrity();
    147         }
    148     }
    149 
    150     public static void main(String[] args)
    151             throws FileNotFoundException, ParserConfigurationException, SAXException, IOException,
    152             UnsupportedFormatException {
    153         final Arguments parsedArgs = new Arguments(args);
    154         FusionDictionary dictionary = readInputFromParsedArgs(parsedArgs);
    155         writeOutputToParsedArgs(parsedArgs, dictionary);
    156     }
    157 
    158     /**
    159      * Invoke the right input method according to args.
    160      *
    161      * @param args the parsed command line arguments.
    162      * @return the read dictionary.
    163      */
    164     private static FusionDictionary readInputFromParsedArgs(final Arguments args)
    165             throws IOException, UnsupportedFormatException, ParserConfigurationException,
    166             SAXException, FileNotFoundException {
    167         if (null != args.mInputBinary) {
    168             return readBinaryFile(args.mInputBinary);
    169         } else if (null != args.mInputUnigramXml) {
    170             return readXmlFile(args.mInputUnigramXml, args.mInputBigramXml);
    171         } else {
    172             throw new RuntimeException("No input file specified");
    173         }
    174     }
    175 
    176     /**
    177      * Read a dictionary from the name of a binary file.
    178      *
    179      * @param binaryFilename the name of the file in the binary dictionary format.
    180      * @return the read dictionary.
    181      * @throws FileNotFoundException if the file can't be found
    182      * @throws IOException if the input file can't be read
    183      * @throws UnsupportedFormatException if the binary file is not in the expected format
    184      */
    185     private static FusionDictionary readBinaryFile(final String binaryFilename)
    186             throws FileNotFoundException, IOException, UnsupportedFormatException {
    187         final RandomAccessFile inputFile = new RandomAccessFile(binaryFilename, "r");
    188         return BinaryDictInputOutput.readDictionaryBinary(inputFile, null);
    189     }
    190 
    191     /**
    192      * Read a dictionary from a unigram XML file, and optionally a bigram XML file.
    193      *
    194      * @param unigramXmlFilename the name of the unigram XML file. May not be null.
    195      * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams.
    196      * @return the read dictionary.
    197      * @throws FileNotFoundException if one of the files can't be found
    198      * @throws SAXException if one or more of the XML files is not well-formed
    199      * @throws IOException if one the input files can't be read
    200      * @throws ParserConfigurationException if the system can't create a SAX parser
    201      */
    202     private static FusionDictionary readXmlFile(final String unigramXmlFilename,
    203             final String bigramXmlFilename) throws FileNotFoundException, SAXException,
    204             IOException, ParserConfigurationException {
    205         final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename));
    206         final FileInputStream bigrams = null == bigramXmlFilename ? null :
    207                 new FileInputStream(new File(bigramXmlFilename));
    208         return XmlDictInputOutput.readDictionaryXml(unigrams, bigrams);
    209     }
    210 
    211     /**
    212      * Invoke the right output method according to args.
    213      *
    214      * This will write the passed dictionary to the file(s) passed in the command line arguments.
    215      * @param args the parsed arguments.
    216      * @param dict the file to output.
    217      * @throws FileNotFoundException if one of the output files can't be created.
    218      * @throws IOException if one of the output files can't be written to.
    219      */
    220     private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict)
    221             throws FileNotFoundException, IOException {
    222         if (null != args.mOutputBinary) {
    223             writeBinaryDictionary(args.mOutputBinary, dict);
    224         }
    225         if (null != args.mOutputXml) {
    226             writeXmlDictionary(args.mOutputXml, dict);
    227         }
    228     }
    229 
    230     /**
    231      * Write the dictionary in binary format to the specified filename.
    232      *
    233      * @param outputFilename the name of the file to write to.
    234      * @param dict the dictionary to write.
    235      * @throws FileNotFoundException if the output file can't be created.
    236      * @throws IOException if the output file can't be written to.
    237      */
    238     private static void writeBinaryDictionary(final String outputFilename,
    239             final FusionDictionary dict) throws FileNotFoundException, IOException {
    240         final File outputFile = new File(outputFilename);
    241         BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict);
    242     }
    243 
    244     /**
    245      * Write the dictionary in XML format to the specified filename.
    246      *
    247      * @param outputFilename the name of the file to write to.
    248      * @param dict the dictionary to write.
    249      * @throws FileNotFoundException if the output file can't be created.
    250      * @throws IOException if the output file can't be written to.
    251      */
    252     private static void writeXmlDictionary(final String outputFilename,
    253             final FusionDictionary dict) throws FileNotFoundException, IOException {
    254         XmlDictInputOutput.writeDictionaryXml(new FileWriter(outputFilename), dict);
    255     }
    256 }
    257