Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 /**
      3 *******************************************************************************
      4 * Copyright (C) 2005-2016, International Business Machines Corporation and    *
      5 * others. All Rights Reserved.                                                *
      6 *******************************************************************************
      7 */
      8 package android.icu.text;
      9 
     10 import java.io.ByteArrayInputStream;
     11 import java.io.IOException;
     12 import java.io.InputStream;
     13 import java.io.InputStreamReader;
     14 import java.io.Reader;
     15 
     16 
     17 /**
     18  * This class represents a charset that has been identified by a CharsetDetector
     19  * as a possible encoding for a set of input data.  From an instance of this
     20  * class, you can ask for a confidence level in the charset identification,
     21  * or for Java Reader or String to access the original byte data in Unicode form.
     22  * <p>
     23  * Instances of this class are created only by CharsetDetectors.
     24  * <p>
     25  * Note:  this class has a natural ordering that is inconsistent with equals.
     26  *        The natural ordering is based on the match confidence value.
     27  *
     28  * @hide Only a subset of ICU is exposed in Android
     29  */
     30 public class CharsetMatch implements Comparable<CharsetMatch> {
     31 
     32 
     33     /**
     34      * Create a java.io.Reader for reading the Unicode character data corresponding
     35      * to the original byte data supplied to the Charset detect operation.
     36      * <p>
     37      * CAUTION:  if the source of the byte data was an InputStream, a Reader
     38      * can be created for only one matching char set using this method.  If more
     39      * than one charset needs to be tried, the caller will need to reset
     40      * the InputStream and create InputStreamReaders itself, based on the charset name.
     41      *
     42      * @return the Reader for the Unicode character data.
     43      */
     44     public Reader getReader() {
     45         InputStream inputStream = fInputStream;
     46 
     47         if (inputStream == null) {
     48             inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength);
     49         }
     50 
     51         try {
     52             inputStream.reset();
     53             return new InputStreamReader(inputStream, getName());
     54         } catch (IOException e) {
     55             return null;
     56         }
     57     }
     58 
     59     /**
     60      * Create a Java String from Unicode character data corresponding
     61      * to the original byte data supplied to the Charset detect operation.
     62      *
     63      * @return a String created from the converted input data.
     64      */
     65     public String getString()  throws java.io.IOException {
     66         return getString(-1);
     67 
     68     }
     69 
     70     /**
     71      * Create a Java String from Unicode character data corresponding
     72      * to the original byte data supplied to the Charset detect operation.
     73      * The length of the returned string is limited to the specified size;
     74      * the string will be trunctated to this length if necessary.  A limit value of
     75      * zero or less is ignored, and treated as no limit.
     76      *
     77      * @param maxLength The maximium length of the String to be created when the
     78      *                  source of the data is an input stream, or -1 for
     79      *                  unlimited length.
     80      * @return a String created from the converted input data.
     81      */
     82     public String getString(int maxLength) throws java.io.IOException {
     83         String result = null;
     84         if (fInputStream != null) {
     85             StringBuilder sb = new StringBuilder();
     86             char[] buffer = new char[1024];
     87             Reader reader = getReader();
     88             int max = maxLength < 0? Integer.MAX_VALUE : maxLength;
     89             int bytesRead = 0;
     90 
     91             while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) >= 0) {
     92                 sb.append(buffer, 0, bytesRead);
     93                 max -= bytesRead;
     94             }
     95 
     96             reader.close();
     97 
     98             return sb.toString();
     99         } else {
    100             String name = getName();
    101             /*
    102              * getName() may return a name with a suffix 'rtl' or 'ltr'. This cannot
    103              * be used to open a charset (e.g. IBM424_rtl). The ending '_rtl' or 'ltr'
    104              * should be stripped off before creating the string.
    105              */
    106             int startSuffix = name.indexOf("_rtl") < 0 ? name.indexOf("_ltr") : name.indexOf("_rtl");
    107             if (startSuffix > 0) {
    108                 name = name.substring(0, startSuffix);
    109             }
    110             result = new String(fRawInput, name);
    111         }
    112         return result;
    113 
    114     }
    115 
    116     /**
    117      * Get an indication of the confidence in the charset detected.
    118      * Confidence values range from 0-100, with larger numbers indicating
    119      * a better match of the input data to the characteristics of the
    120      * charset.
    121      *
    122      * @return the confidence in the charset match
    123      */
    124     public int getConfidence() {
    125         return fConfidence;
    126     }
    127 
    128     /**
    129      * Get the name of the detected charset.
    130      * The name will be one that can be used with other APIs on the
    131      * platform that accept charset names.  It is the "Canonical name"
    132      * as defined by the class java.nio.charset.Charset; for
    133      * charsets that are registered with the IANA charset registry,
    134      * this is the MIME-preferred registerd name.
    135      *
    136      * @see java.nio.charset.Charset
    137      * @see java.io.InputStreamReader
    138      *
    139      * @return The name of the charset.
    140      */
    141     public String getName() {
    142         return fCharsetName;
    143     }
    144 
    145     /**
    146      * Get the ISO code for the language of the detected charset.
    147      *
    148      * @return The ISO code for the language or <code>null</code> if the language cannot be determined.
    149      */
    150     public String getLanguage() {
    151         return fLang;
    152     }
    153 
    154     /**
    155      * Compare to other CharsetMatch objects.
    156      * Comparison is based on the match confidence value, which
    157      *   allows CharsetDetector.detectAll() to order its results.
    158      *
    159      * @param other the CharsetMatch object to compare against.
    160      * @return  a negative integer, zero, or a positive integer as the
    161      *          confidence level of this CharsetMatch
    162      *          is less than, equal to, or greater than that of
    163      *          the argument.
    164      * @throws ClassCastException if the argument is not a CharsetMatch.
    165      */
    166     public int compareTo (CharsetMatch other) {
    167         int compareResult = 0;
    168         if (this.fConfidence > other.fConfidence) {
    169             compareResult = 1;
    170         } else if (this.fConfidence < other.fConfidence) {
    171             compareResult = -1;
    172         }
    173         return compareResult;
    174     }
    175 
    176     /*
    177      *  Constructor.  Implementation internal
    178      */
    179     CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf) {
    180         fConfidence = conf;
    181 
    182         // The references to the original application input data must be copied out
    183         //   of the charset recognizer to here, in case the application resets the
    184         //   recognizer before using this CharsetMatch.
    185         if (det.fInputStream == null) {
    186             // We only want the existing input byte data if it came straight from the user,
    187             //   not if is just the head of a stream.
    188             fRawInput    = det.fRawInput;
    189             fRawLength   = det.fRawLength;
    190         }
    191         fInputStream = det.fInputStream;
    192         fCharsetName = rec.getName();
    193         fLang = rec.getLanguage();
    194     }
    195 
    196     /*
    197      *  Constructor.  Implementation internal
    198      */
    199     CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf, String csName, String lang) {
    200         fConfidence = conf;
    201 
    202         // The references to the original application input data must be copied out
    203         //   of the charset recognizer to here, in case the application resets the
    204         //   recognizer before using this CharsetMatch.
    205         if (det.fInputStream == null) {
    206             // We only want the existing input byte data if it came straight from the user,
    207             //   not if is just the head of a stream.
    208             fRawInput    = det.fRawInput;
    209             fRawLength   = det.fRawLength;
    210         }
    211         fInputStream = det.fInputStream;
    212         fCharsetName = csName;
    213         fLang = lang;
    214     }
    215 
    216 
    217     //
    218     //   Private Data
    219     //
    220     private int                 fConfidence;
    221     private byte[]              fRawInput = null;     // Original, untouched input bytes.
    222                                                       //  If user gave us a byte array, this is it.
    223     private int                 fRawLength;           // Length of data in fRawInput array.
    224 
    225     private InputStream         fInputStream = null;  // User's input stream, or null if the user
    226                                                       //   gave us a byte array.
    227 
    228     private String              fCharsetName;         // The name of the charset this CharsetMatch
    229                                                       //   represents.  Filled in by the recognizer.
    230     private String              fLang;                // The language, if one was determined by
    231                                                       //   the recognizer during the detect operation.
    232 }
    233