1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 /** 3 ******************************************************************************* 4 * Copyright (C) 2005-2016, International Business Machines Corporation and * 5 * others. All Rights Reserved. * 6 ******************************************************************************* 7 */ 8 package android.icu.text; 9 10 import java.io.ByteArrayInputStream; 11 import java.io.IOException; 12 import java.io.InputStream; 13 import java.io.InputStreamReader; 14 import java.io.Reader; 15 16 17 /** 18 * This class represents a charset that has been identified by a CharsetDetector 19 * as a possible encoding for a set of input data. From an instance of this 20 * class, you can ask for a confidence level in the charset identification, 21 * or for Java Reader or String to access the original byte data in Unicode form. 22 * <p> 23 * Instances of this class are created only by CharsetDetectors. 24 * <p> 25 * Note: this class has a natural ordering that is inconsistent with equals. 26 * The natural ordering is based on the match confidence value. 27 * 28 * @hide Only a subset of ICU is exposed in Android 29 */ 30 public class CharsetMatch implements Comparable<CharsetMatch> { 31 32 33 /** 34 * Create a java.io.Reader for reading the Unicode character data corresponding 35 * to the original byte data supplied to the Charset detect operation. 36 * <p> 37 * CAUTION: if the source of the byte data was an InputStream, a Reader 38 * can be created for only one matching char set using this method. If more 39 * than one charset needs to be tried, the caller will need to reset 40 * the InputStream and create InputStreamReaders itself, based on the charset name. 41 * 42 * @return the Reader for the Unicode character data. 43 */ 44 public Reader getReader() { 45 InputStream inputStream = fInputStream; 46 47 if (inputStream == null) { 48 inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength); 49 } 50 51 try { 52 inputStream.reset(); 53 return new InputStreamReader(inputStream, getName()); 54 } catch (IOException e) { 55 return null; 56 } 57 } 58 59 /** 60 * Create a Java String from Unicode character data corresponding 61 * to the original byte data supplied to the Charset detect operation. 62 * 63 * @return a String created from the converted input data. 64 */ 65 public String getString() throws java.io.IOException { 66 return getString(-1); 67 68 } 69 70 /** 71 * Create a Java String from Unicode character data corresponding 72 * to the original byte data supplied to the Charset detect operation. 73 * The length of the returned string is limited to the specified size; 74 * the string will be trunctated to this length if necessary. A limit value of 75 * zero or less is ignored, and treated as no limit. 76 * 77 * @param maxLength The maximium length of the String to be created when the 78 * source of the data is an input stream, or -1 for 79 * unlimited length. 80 * @return a String created from the converted input data. 81 */ 82 public String getString(int maxLength) throws java.io.IOException { 83 String result = null; 84 if (fInputStream != null) { 85 StringBuilder sb = new StringBuilder(); 86 char[] buffer = new char[1024]; 87 Reader reader = getReader(); 88 int max = maxLength < 0? Integer.MAX_VALUE : maxLength; 89 int bytesRead = 0; 90 91 while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) >= 0) { 92 sb.append(buffer, 0, bytesRead); 93 max -= bytesRead; 94 } 95 96 reader.close(); 97 98 return sb.toString(); 99 } else { 100 String name = getName(); 101 /* 102 * getName() may return a name with a suffix 'rtl' or 'ltr'. This cannot 103 * be used to open a charset (e.g. IBM424_rtl). The ending '_rtl' or 'ltr' 104 * should be stripped off before creating the string. 105 */ 106 int startSuffix = name.indexOf("_rtl") < 0 ? name.indexOf("_ltr") : name.indexOf("_rtl"); 107 if (startSuffix > 0) { 108 name = name.substring(0, startSuffix); 109 } 110 result = new String(fRawInput, name); 111 } 112 return result; 113 114 } 115 116 /** 117 * Get an indication of the confidence in the charset detected. 118 * Confidence values range from 0-100, with larger numbers indicating 119 * a better match of the input data to the characteristics of the 120 * charset. 121 * 122 * @return the confidence in the charset match 123 */ 124 public int getConfidence() { 125 return fConfidence; 126 } 127 128 /** 129 * Get the name of the detected charset. 130 * The name will be one that can be used with other APIs on the 131 * platform that accept charset names. It is the "Canonical name" 132 * as defined by the class java.nio.charset.Charset; for 133 * charsets that are registered with the IANA charset registry, 134 * this is the MIME-preferred registerd name. 135 * 136 * @see java.nio.charset.Charset 137 * @see java.io.InputStreamReader 138 * 139 * @return The name of the charset. 140 */ 141 public String getName() { 142 return fCharsetName; 143 } 144 145 /** 146 * Get the ISO code for the language of the detected charset. 147 * 148 * @return The ISO code for the language or <code>null</code> if the language cannot be determined. 149 */ 150 public String getLanguage() { 151 return fLang; 152 } 153 154 /** 155 * Compare to other CharsetMatch objects. 156 * Comparison is based on the match confidence value, which 157 * allows CharsetDetector.detectAll() to order its results. 158 * 159 * @param other the CharsetMatch object to compare against. 160 * @return a negative integer, zero, or a positive integer as the 161 * confidence level of this CharsetMatch 162 * is less than, equal to, or greater than that of 163 * the argument. 164 * @throws ClassCastException if the argument is not a CharsetMatch. 165 */ 166 public int compareTo (CharsetMatch other) { 167 int compareResult = 0; 168 if (this.fConfidence > other.fConfidence) { 169 compareResult = 1; 170 } else if (this.fConfidence < other.fConfidence) { 171 compareResult = -1; 172 } 173 return compareResult; 174 } 175 176 /* 177 * Constructor. Implementation internal 178 */ 179 CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf) { 180 fConfidence = conf; 181 182 // The references to the original application input data must be copied out 183 // of the charset recognizer to here, in case the application resets the 184 // recognizer before using this CharsetMatch. 185 if (det.fInputStream == null) { 186 // We only want the existing input byte data if it came straight from the user, 187 // not if is just the head of a stream. 188 fRawInput = det.fRawInput; 189 fRawLength = det.fRawLength; 190 } 191 fInputStream = det.fInputStream; 192 fCharsetName = rec.getName(); 193 fLang = rec.getLanguage(); 194 } 195 196 /* 197 * Constructor. Implementation internal 198 */ 199 CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf, String csName, String lang) { 200 fConfidence = conf; 201 202 // The references to the original application input data must be copied out 203 // of the charset recognizer to here, in case the application resets the 204 // recognizer before using this CharsetMatch. 205 if (det.fInputStream == null) { 206 // We only want the existing input byte data if it came straight from the user, 207 // not if is just the head of a stream. 208 fRawInput = det.fRawInput; 209 fRawLength = det.fRawLength; 210 } 211 fInputStream = det.fInputStream; 212 fCharsetName = csName; 213 fLang = lang; 214 } 215 216 217 // 218 // Private Data 219 // 220 private int fConfidence; 221 private byte[] fRawInput = null; // Original, untouched input bytes. 222 // If user gave us a byte array, this is it. 223 private int fRawLength; // Length of data in fRawInput array. 224 225 private InputStream fInputStream = null; // User's input stream, or null if the user 226 // gave us a byte array. 227 228 private String fCharsetName; // The name of the charset this CharsetMatch 229 // represents. Filled in by the recognizer. 230 private String fLang; // The language, if one was determined by 231 // the recognizer during the detect operation. 232 } 233