1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2006-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.charset; 11 12 import java.io.IOException; 13 import java.nio.charset.Charset; 14 import java.nio.charset.UnsupportedCharsetException; 15 import java.nio.charset.spi.CharsetProvider; 16 import java.util.Collections; 17 import java.util.Iterator; 18 import java.util.LinkedList; 19 import java.util.List; 20 21 import com.ibm.icu.impl.InvalidFormatException; 22 23 24 /** 25 * A concrete subclass of CharsetProvider for loading and providing charset converters 26 * in ICU. 27 * @stable ICU 3.6 28 */ 29 public final class CharsetProviderICU extends CharsetProvider{ 30 /** 31 * List of available ICU Charsets, empty during static initialization. 32 * Not a Set or Map, so that we can add different Charset objects with the same name(), 33 * which means that they are .equals(). See ICU ticket #11493. 34 */ 35 private static List<Charset> icuCharsets = Collections.<Charset>emptyList(); 36 37 /** 38 * Default constructor 39 * @stable ICU 3.6 40 */ 41 public CharsetProviderICU() { 42 } 43 44 /** 45 * Constructs a Charset for the given charset name. 46 * Implements the abstract method of super class. 47 * @param charsetName charset name 48 * @return Charset object for the given charset name, null if unsupported 49 * @stable ICU 3.6 50 */ 51 @Override 52 public final Charset charsetForName(String charsetName){ 53 try{ 54 // extract the options from the charset name 55 String optionsString = ""; 56 if (charsetName.endsWith(UConverterConstants.OPTION_SWAP_LFNL_STRING)) { 57 /* Remove and save the swap lfnl option string portion of the charset name. */ 58 optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING; 59 charsetName = charsetName.substring(0, charsetName.length() - optionsString.length()); 60 } 61 // get the canonical name 62 String icuCanonicalName = getICUCanonicalName(charsetName); 63 64 // create the converter object and return it 65 if(icuCanonicalName==null || icuCanonicalName.length()==0){ 66 // Try the original name, may be something added and not in the alias table. 67 // Will get an unsupported encoding exception if it doesn't work. 68 icuCanonicalName = charsetName; 69 } 70 return getCharset(icuCanonicalName, optionsString); 71 }catch(UnsupportedCharsetException ex){ 72 }catch(IOException ex){ 73 } 74 return null; 75 } 76 77 /** 78 * Constructs a charset for the given ICU conversion table from the specified class path. 79 * Example use: <code>cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");</code>. 80 * In this example myConverter.cnv would exist in the com/myCompany/myDataPackage Java package. 81 * Conversion tables can be made with ICU4C's makeconv tool. 82 * This function allows you to allows you to load user defined conversion 83 * tables that are outside of ICU's core data. 84 * @param charsetName The name of the charset conversion table. 85 * @param classPath The class path that contain the conversion table. 86 * @return charset object for the given charset name, null if unsupported 87 * @stable ICU 3.8 88 */ 89 public final Charset charsetForName(String charsetName, String classPath) { 90 return charsetForName(charsetName, classPath, null); 91 } 92 93 /** 94 * Constructs a charset for the given ICU conversion table from the specified class path. 95 * This function is similar to {@link #charsetForName(String, String)}. 96 * @param charsetName The name of the charset conversion table. 97 * @param classPath The class path that contain the conversion table. 98 * @param loader the class object from which to load the charset conversion table 99 * @return charset object for the given charset name, null if unsupported 100 * @stable ICU 3.8 101 */ 102 public Charset charsetForName(String charsetName, String classPath, ClassLoader loader) { 103 CharsetMBCS cs = null; 104 try { 105 cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, loader); 106 } catch (InvalidFormatException e) { 107 // return null; 108 } 109 return cs; 110 } 111 112 /** 113 * Gets the canonical name of the converter as defined by Java 114 * @param enc converter name 115 * @return canonical name of the converter 116 * @internal 117 * @deprecated This API is ICU internal only. 118 */ 119 @Deprecated 120 public static final String getICUCanonicalName(String enc) 121 throws UnsupportedCharsetException{ 122 String canonicalName = null; 123 String ret = null; 124 try{ 125 if(enc!=null){ 126 if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){ 127 ret = canonicalName; 128 } else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){ 129 ret = canonicalName; 130 } else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){ 131 /* we have some aliases in the form x-blah .. match those */ 132 ret = canonicalName; 133 }/*else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){ 134 ret = canonicalName; 135 }*/else if(enc.indexOf("x-")==0 || enc.indexOf("X-")==0){ 136 /* TODO: Match with getJavaCanonicalName method */ 137 /* 138 char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0}; 139 strcpy(temp, encName+2); 140 */ 141 // Remove the 'x-' and get the ICU canonical name 142 if ((canonicalName = UConverterAlias.getAlias(enc.substring(2), 0))!=null) { 143 ret = canonicalName; 144 } else { 145 ret = ""; 146 } 147 148 }else{ 149 /* unsupported encoding */ 150 ret = ""; 151 } 152 } 153 return ret; 154 }catch(IOException ex){ 155 throw new UnsupportedCharsetException(enc); 156 } 157 } 158 private static final Charset getCharset(String icuCanonicalName, String optionsString) 159 throws IOException { 160 String[] aliases = getAliases(icuCanonicalName); 161 String canonicalName = getJavaCanonicalName(icuCanonicalName); 162 163 /* Concat the option string to the icuCanonicalName so that the options can be handled properly 164 * by the actual charset. 165 */ 166 return (CharsetICU.getCharset(icuCanonicalName + optionsString, canonicalName, aliases)); 167 } 168 /** 169 * Gets the canonical name of the converter as defined by Java 170 * @param charsetName converter name 171 * @return canonical name of the converter 172 * @internal 173 * @deprecated This API is ICU internal only. 174 */ 175 @Deprecated 176 public static String getJavaCanonicalName(String charsetName){ 177 /* 178 If a charset listed in the IANA Charset Registry is supported by an implementation 179 of the Java platform then its canonical name must be the name listed in the registry. 180 Many charsets are given more than one name in the registry, in which case the registry 181 identifies one of the names as MIME-preferred. If a charset has more than one registry 182 name then its canonical name must be the MIME-preferred name and the other names in 183 the registry must be valid aliases. If a supported charset is not listed in the IANA 184 registry then its canonical name must begin with one of the strings "X-" or "x-". 185 */ 186 if(charsetName==null ){ 187 return null; 188 } 189 try{ 190 String cName = null; 191 /* find out the alias with MIME tag */ 192 if((cName=UConverterAlias.getStandardName(charsetName, "MIME"))!=null){ 193 /* find out the alias with IANA tag */ 194 }else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){ 195 }else { 196 /* 197 check to see if an alias already exists with x- prefix, if yes then 198 make that the canonical name 199 */ 200 int aliasNum = UConverterAlias.countAliases(charsetName); 201 String name; 202 for(int i=0;i<aliasNum;i++){ 203 name = UConverterAlias.getAlias(charsetName, i); 204 if(name!=null && name.indexOf("x-")==0){ 205 cName = name; 206 break; 207 } 208 } 209 /* last resort just append x- to any of the alias and 210 make it the canonical name */ 211 if((cName==null || cName.length()==0)){ 212 name = UConverterAlias.getStandardName(charsetName, "UTR22"); 213 if(name==null && charsetName.indexOf(",")!=-1){ 214 name = UConverterAlias.getAlias(charsetName, 1); 215 } 216 /* if there is no UTR22 canonical name .. then just return itself*/ 217 if(name==null){ 218 name = charsetName; 219 } 220 cName = "x-"+ name; 221 } 222 } 223 return cName; 224 }catch (IOException ex){ 225 226 } 227 return null; 228 } 229 230 /** 231 * Gets the aliases associated with the converter name 232 * @param encName converter name 233 * @return converter names as elements in an object array 234 * @internal 235 * @deprecated This API is ICU internal only. 236 */ 237 @Deprecated 238 private static final String[] getAliases(String encName)throws IOException{ 239 String[] ret = null; 240 int aliasNum = 0; 241 int i=0; 242 int j=0; 243 String aliasArray[/*50*/] = new String[50]; 244 245 if(encName != null){ 246 aliasNum = UConverterAlias.countAliases(encName); 247 for(i=0,j=0;i<aliasNum;i++){ 248 String name = UConverterAlias.getAlias(encName,i); 249 if(name.indexOf(',')==-1){ 250 aliasArray[j++]= name; 251 } 252 } 253 ret = new String[j]; 254 for(;--j>=0;) { 255 ret[j] = aliasArray[j]; 256 } 257 258 } 259 return (ret); 260 261 } 262 263 /** 264 * Lazy-init the icuCharsets list. 265 * Could be done during static initialization if constructing all of the Charsets 266 * were cheap enough. See ICU ticket #11481. 267 */ 268 private static final synchronized void loadAvailableICUCharsets() { 269 if (!icuCharsets.isEmpty()) { 270 return; 271 } 272 List<Charset> icucs = new LinkedList<Charset>(); 273 int num = UConverterAlias.countAvailable(); 274 for (int i = 0; i < num; ++i) { 275 String name = UConverterAlias.getAvailableName(i); 276 try { 277 Charset cs = getCharset(name, ""); 278 icucs.add(cs); 279 } catch(UnsupportedCharsetException ex) { 280 } catch(IOException e) { 281 } 282 // add only charsets that can be created! 283 } 284 // Unmodifiable so that charsets().next().remove() cannot change it. 285 icuCharsets = Collections.unmodifiableList(icucs); 286 } 287 288 /** 289 * Returns an iterator for the available ICU Charsets. 290 * Implements the abstract method of super class. 291 * @return the Charset iterator 292 * @stable ICU 3.6 293 */ 294 @Override 295 public final Iterator<Charset> charsets() { 296 loadAvailableICUCharsets(); 297 return icuCharsets.iterator(); 298 } 299 300 /** 301 * Gets the canonical names of available ICU converters 302 * @return array of available converter names 303 * @internal 304 * @deprecated This API is ICU internal only. 305 */ 306 @Deprecated 307 public static final String[] getAvailableNames() { 308 loadAvailableICUCharsets(); 309 String[] names = new String[icuCharsets.size()]; 310 int i = 0; 311 for (Charset cs : icuCharsets) { 312 names[i++] = cs.name(); 313 } 314 return names; 315 } 316 317 /** 318 * Return all names available 319 * @return String[] an array of all available names 320 * @internal 321 * @deprecated This API is ICU internal only. 322 */ 323 @Deprecated 324 public static final String[] getAllNames(){ 325 int num = UConverterAlias.countAvailable(); 326 String[] names = new String[num]; 327 for(int i=0;i<num;i++) { 328 names[i] = UConverterAlias.getAvailableName(i); 329 } 330 return names; 331 } 332 } 333