1 /** 2 ******************************************************************************* 3 * Copyright (C) 2006-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 package com.ibm.icu.charset; 9 10 import java.io.IOException; 11 import java.nio.charset.Charset; 12 import java.nio.charset.UnsupportedCharsetException; 13 import java.nio.charset.spi.CharsetProvider; 14 import java.util.Collections; 15 import java.util.Iterator; 16 import java.util.LinkedList; 17 import java.util.List; 18 19 import com.ibm.icu.impl.InvalidFormatException; 20 21 22 /** 23 * A concrete subclass of CharsetProvider for loading and providing charset converters 24 * in ICU. 25 * @stable ICU 3.6 26 */ 27 public final class CharsetProviderICU extends CharsetProvider{ 28 /** 29 * List of available ICU Charsets, empty during static initialization. 30 * Not a Set or Map, so that we can add different Charset objects with the same name(), 31 * which means that they are .equals(). See ICU ticket #11493. 32 */ 33 private static List<Charset> icuCharsets = Collections.<Charset>emptyList(); 34 35 /** 36 * Default constructor 37 * @stable ICU 3.6 38 */ 39 public CharsetProviderICU() { 40 } 41 42 /** 43 * Constructs a Charset for the given charset name. 44 * Implements the abstract method of super class. 45 * @param charsetName charset name 46 * @return Charset object for the given charset name, null if unsupported 47 * @stable ICU 3.6 48 */ 49 public final Charset charsetForName(String charsetName){ 50 try{ 51 // extract the options from the charset name 52 String optionsString = ""; 53 if (charsetName.endsWith(UConverterConstants.OPTION_SWAP_LFNL_STRING)) { 54 /* Remove and save the swap lfnl option string portion of the charset name. */ 55 optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING; 56 charsetName = charsetName.substring(0, charsetName.length() - optionsString.length()); 57 } 58 // get the canonical name 59 String icuCanonicalName = getICUCanonicalName(charsetName); 60 61 // create the converter object and return it 62 if(icuCanonicalName==null || icuCanonicalName.length()==0){ 63 // Try the original name, may be something added and not in the alias table. 64 // Will get an unsupported encoding exception if it doesn't work. 65 icuCanonicalName = charsetName; 66 } 67 return getCharset(icuCanonicalName, optionsString); 68 }catch(UnsupportedCharsetException ex){ 69 }catch(IOException ex){ 70 } 71 return null; 72 } 73 74 /** 75 * Constructs a charset for the given ICU conversion table from the specified class path. 76 * Example use: <code>cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");</code>. 77 * In this example myConverter.cnv would exist in the com/myCompany/myDataPackage Java package. 78 * Conversion tables can be made with ICU4C's makeconv tool. 79 * This function allows you to allows you to load user defined conversion 80 * tables that are outside of ICU's core data. 81 * @param charsetName The name of the charset conversion table. 82 * @param classPath The class path that contain the conversion table. 83 * @return charset object for the given charset name, null if unsupported 84 * @stable ICU 3.8 85 */ 86 public final Charset charsetForName(String charsetName, String classPath) { 87 return charsetForName(charsetName, classPath, null); 88 } 89 90 /** 91 * Constructs a charset for the given ICU conversion table from the specified class path. 92 * This function is similar to {@link #charsetForName(String, String)}. 93 * @param charsetName The name of the charset conversion table. 94 * @param classPath The class path that contain the conversion table. 95 * @param loader the class object from which to load the charset conversion table 96 * @return charset object for the given charset name, null if unsupported 97 * @stable ICU 3.8 98 */ 99 public Charset charsetForName(String charsetName, String classPath, ClassLoader loader) { 100 CharsetMBCS cs = null; 101 try { 102 cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, loader); 103 } catch (InvalidFormatException e) { 104 // return null; 105 } 106 return cs; 107 } 108 109 /** 110 * Gets the canonical name of the converter as defined by Java 111 * @param enc converter name 112 * @return canonical name of the converter 113 * @internal 114 * @deprecated This API is ICU internal only. 115 */ 116 @Deprecated 117 public static final String getICUCanonicalName(String enc) 118 throws UnsupportedCharsetException{ 119 String canonicalName = null; 120 String ret = null; 121 try{ 122 if(enc!=null){ 123 if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){ 124 ret = canonicalName; 125 } else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){ 126 ret = canonicalName; 127 } else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){ 128 /* we have some aliases in the form x-blah .. match those */ 129 ret = canonicalName; 130 }/*else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){ 131 ret = canonicalName; 132 }*/else if(enc.indexOf("x-")==0 || enc.indexOf("X-")==0){ 133 /* TODO: Match with getJavaCanonicalName method */ 134 /* 135 char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0}; 136 strcpy(temp, encName+2); 137 */ 138 // Remove the 'x-' and get the ICU canonical name 139 if ((canonicalName = UConverterAlias.getAlias(enc.substring(2), 0))!=null) { 140 ret = canonicalName; 141 } else { 142 ret = ""; 143 } 144 145 }else{ 146 /* unsupported encoding */ 147 ret = ""; 148 } 149 } 150 return ret; 151 }catch(IOException ex){ 152 throw new UnsupportedCharsetException(enc); 153 } 154 } 155 private static final Charset getCharset(String icuCanonicalName, String optionsString) 156 throws IOException { 157 String[] aliases = getAliases(icuCanonicalName); 158 String canonicalName = getJavaCanonicalName(icuCanonicalName); 159 160 /* Concat the option string to the icuCanonicalName so that the options can be handled properly 161 * by the actual charset. 162 */ 163 return (CharsetICU.getCharset(icuCanonicalName + optionsString, canonicalName, aliases)); 164 } 165 /** 166 * Gets the canonical name of the converter as defined by Java 167 * @param charsetName converter name 168 * @return canonical name of the converter 169 * @internal 170 * @deprecated This API is ICU internal only. 171 */ 172 @Deprecated 173 public static String getJavaCanonicalName(String charsetName){ 174 /* 175 If a charset listed in the IANA Charset Registry is supported by an implementation 176 of the Java platform then its canonical name must be the name listed in the registry. 177 Many charsets are given more than one name in the registry, in which case the registry 178 identifies one of the names as MIME-preferred. If a charset has more than one registry 179 name then its canonical name must be the MIME-preferred name and the other names in 180 the registry must be valid aliases. If a supported charset is not listed in the IANA 181 registry then its canonical name must begin with one of the strings "X-" or "x-". 182 */ 183 if(charsetName==null ){ 184 return null; 185 } 186 try{ 187 String cName = null; 188 /* find out the alias with MIME tag */ 189 if((cName=UConverterAlias.getStandardName(charsetName, "MIME"))!=null){ 190 /* find out the alias with IANA tag */ 191 }else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){ 192 }else { 193 /* 194 check to see if an alias already exists with x- prefix, if yes then 195 make that the canonical name 196 */ 197 int aliasNum = UConverterAlias.countAliases(charsetName); 198 String name; 199 for(int i=0;i<aliasNum;i++){ 200 name = UConverterAlias.getAlias(charsetName, i); 201 if(name!=null && name.indexOf("x-")==0){ 202 cName = name; 203 break; 204 } 205 } 206 /* last resort just append x- to any of the alias and 207 make it the canonical name */ 208 if((cName==null || cName.length()==0)){ 209 name = UConverterAlias.getStandardName(charsetName, "UTR22"); 210 if(name==null && charsetName.indexOf(",")!=-1){ 211 name = UConverterAlias.getAlias(charsetName, 1); 212 } 213 /* if there is no UTR22 canonical name .. then just return itself*/ 214 if(name==null){ 215 name = charsetName; 216 } 217 cName = "x-"+ name; 218 } 219 } 220 return cName; 221 }catch (IOException ex){ 222 223 } 224 return null; 225 } 226 227 /** 228 * Gets the aliases associated with the converter name 229 * @param encName converter name 230 * @return converter names as elements in an object array 231 * @internal 232 * @deprecated This API is ICU internal only. 233 */ 234 @Deprecated 235 private static final String[] getAliases(String encName)throws IOException{ 236 String[] ret = null; 237 int aliasNum = 0; 238 int i=0; 239 int j=0; 240 String aliasArray[/*50*/] = new String[50]; 241 242 if(encName != null){ 243 aliasNum = UConverterAlias.countAliases(encName); 244 for(i=0,j=0;i<aliasNum;i++){ 245 String name = UConverterAlias.getAlias(encName,i); 246 if(name.indexOf(',')==-1){ 247 aliasArray[j++]= name; 248 } 249 } 250 ret = new String[j]; 251 for(;--j>=0;) { 252 ret[j] = aliasArray[j]; 253 } 254 255 } 256 return (ret); 257 258 } 259 260 /** 261 * Lazy-init the icuCharsets list. 262 * Could be done during static initialization if constructing all of the Charsets 263 * were cheap enough. See ICU ticket #11481. 264 */ 265 private static final synchronized void loadAvailableICUCharsets() { 266 if (!icuCharsets.isEmpty()) { 267 return; 268 } 269 List<Charset> icucs = new LinkedList<Charset>(); 270 int num = UConverterAlias.countAvailable(); 271 for (int i = 0; i < num; ++i) { 272 String name = UConverterAlias.getAvailableName(i); 273 try { 274 Charset cs = getCharset(name, ""); 275 icucs.add(cs); 276 } catch(UnsupportedCharsetException ex) { 277 } catch(IOException e) { 278 } 279 // add only charsets that can be created! 280 } 281 // Unmodifiable so that charsets().next().remove() cannot change it. 282 icuCharsets = Collections.unmodifiableList(icucs); 283 } 284 285 /** 286 * Returns an iterator for the available ICU Charsets. 287 * Implements the abstract method of super class. 288 * @return the Charset iterator 289 * @stable ICU 3.6 290 */ 291 public final Iterator<Charset> charsets() { 292 loadAvailableICUCharsets(); 293 return icuCharsets.iterator(); 294 } 295 296 /** 297 * Gets the canonical names of available ICU converters 298 * @return array of available converter names 299 * @internal 300 * @deprecated This API is ICU internal only. 301 */ 302 @Deprecated 303 public static final String[] getAvailableNames() { 304 loadAvailableICUCharsets(); 305 String[] names = new String[icuCharsets.size()]; 306 int i = 0; 307 for (Charset cs : icuCharsets) { 308 names[i++] = cs.name(); 309 } 310 return names; 311 } 312 313 /** 314 * Return all names available 315 * @return String[] an array of all available names 316 * @internal 317 * @deprecated This API is ICU internal only. 318 */ 319 @Deprecated 320 public static final String[] getAllNames(){ 321 int num = UConverterAlias.countAvailable(); 322 String[] names = new String[num]; 323 for(int i=0;i<num;i++) { 324 names[i] = UConverterAlias.getAvailableName(i); 325 } 326 return names; 327 } 328 } 329