Home | History | Annotate | Download | only in charset
      1 /**
      2 *******************************************************************************
      3 * Copyright (C) 2006-2015, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 */
      7 
      8 package com.ibm.icu.charset;
      9 
     10 import java.io.IOException;
     11 import java.nio.charset.Charset;
     12 import java.nio.charset.UnsupportedCharsetException;
     13 import java.nio.charset.spi.CharsetProvider;
     14 import java.util.Collections;
     15 import java.util.Iterator;
     16 import java.util.LinkedList;
     17 import java.util.List;
     18 
     19 import com.ibm.icu.impl.InvalidFormatException;
     20 
     21 
     22 /**
     23  * A concrete subclass of CharsetProvider for loading and providing charset converters
     24  * in ICU.
     25  * @stable ICU 3.6
     26  */
     27 public final class CharsetProviderICU extends CharsetProvider{
     28     /**
     29      * List of available ICU Charsets, empty during static initialization.
     30      * Not a Set or Map, so that we can add different Charset objects with the same name(),
     31      * which means that they are .equals(). See ICU ticket #11493.
     32      */
     33     private static List<Charset> icuCharsets = Collections.<Charset>emptyList();
     34 
     35     /**
     36      * Default constructor
     37      * @stable ICU 3.6
     38      */
     39     public CharsetProviderICU() {
     40     }
     41 
     42     /**
     43      * Constructs a Charset for the given charset name.
     44      * Implements the abstract method of super class.
     45      * @param charsetName charset name
     46      * @return Charset object for the given charset name, null if unsupported
     47      * @stable ICU 3.6
     48      */
     49     public final Charset charsetForName(String charsetName){
     50         try{
     51             // extract the options from the charset name
     52             String optionsString = "";
     53             if (charsetName.endsWith(UConverterConstants.OPTION_SWAP_LFNL_STRING)) {
     54                 /* Remove and save the swap lfnl option string portion of the charset name. */
     55                 optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING;
     56                 charsetName = charsetName.substring(0, charsetName.length() - optionsString.length());
     57             }
     58             // get the canonical name
     59             String icuCanonicalName = getICUCanonicalName(charsetName);
     60 
     61             // create the converter object and return it
     62             if(icuCanonicalName==null || icuCanonicalName.length()==0){
     63                 // Try the original name, may be something added and not in the alias table.
     64                 // Will get an unsupported encoding exception if it doesn't work.
     65                 icuCanonicalName = charsetName;
     66             }
     67             return getCharset(icuCanonicalName, optionsString);
     68         }catch(UnsupportedCharsetException ex){
     69         }catch(IOException ex){
     70         }
     71         return null;
     72     }
     73 
     74     /**
     75      * Constructs a charset for the given ICU conversion table from the specified class path.
     76      * Example use: <code>cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");</code>.
     77      * In this example myConverter.cnv would exist in the com/myCompany/myDataPackage Java package.
     78      * Conversion tables can be made with ICU4C's makeconv tool.
     79      * This function allows you to allows you to load user defined conversion
     80      * tables that are outside of ICU's core data.
     81      * @param charsetName The name of the charset conversion table.
     82      * @param classPath The class path that contain the conversion table.
     83      * @return charset object for the given charset name, null if unsupported
     84      * @stable ICU 3.8
     85      */
     86     public final Charset charsetForName(String charsetName, String classPath) {
     87         return charsetForName(charsetName, classPath, null);
     88     }
     89 
     90     /**
     91      * Constructs a charset for the given ICU conversion table from the specified class path.
     92      * This function is similar to {@link #charsetForName(String, String)}.
     93      * @param charsetName The name of the charset conversion table.
     94      * @param classPath The class path that contain the conversion table.
     95      * @param loader the class object from which to load the charset conversion table
     96      * @return charset object for the given charset name, null if unsupported
     97      * @stable ICU 3.8
     98      */
     99     public Charset charsetForName(String charsetName, String classPath, ClassLoader loader) {
    100         CharsetMBCS cs = null;
    101         try {
    102              cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, loader);
    103         } catch (InvalidFormatException e) {
    104             // return null;
    105         }
    106         return cs;
    107     }
    108 
    109     /**
    110      * Gets the canonical name of the converter as defined by Java
    111      * @param enc converter name
    112      * @return canonical name of the converter
    113      * @internal
    114      * @deprecated This API is ICU internal only.
    115      */
    116      @Deprecated
    117      public static final String getICUCanonicalName(String enc)
    118                                 throws UnsupportedCharsetException{
    119         String canonicalName = null;
    120         String ret = null;
    121         try{
    122             if(enc!=null){
    123                  if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){
    124                     ret = canonicalName;
    125                 } else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){
    126                     ret = canonicalName;
    127                 } else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){
    128                     /* we have some aliases in the form x-blah .. match those */
    129                     ret = canonicalName;
    130                 }/*else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){
    131                     ret = canonicalName;
    132                 }*/else if(enc.indexOf("x-")==0 || enc.indexOf("X-")==0){
    133                     /* TODO: Match with getJavaCanonicalName method */
    134                     /*
    135                     char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0};
    136                     strcpy(temp, encName+2);
    137                     */
    138                     // Remove the 'x-' and get the ICU canonical name
    139                     if ((canonicalName = UConverterAlias.getAlias(enc.substring(2), 0))!=null) {
    140                         ret = canonicalName;
    141                     } else {
    142                         ret = "";
    143                     }
    144 
    145                 }else{
    146                     /* unsupported encoding */
    147                    ret = "";
    148                 }
    149             }
    150             return ret;
    151         }catch(IOException ex){
    152             throw new UnsupportedCharsetException(enc);
    153         }
    154     }
    155     private static final Charset getCharset(String icuCanonicalName, String optionsString)
    156             throws IOException {
    157        String[] aliases = getAliases(icuCanonicalName);
    158        String canonicalName = getJavaCanonicalName(icuCanonicalName);
    159 
    160        /* Concat the option string to the icuCanonicalName so that the options can be handled properly
    161         * by the actual charset.
    162         */
    163        return (CharsetICU.getCharset(icuCanonicalName + optionsString, canonicalName, aliases));
    164     }
    165     /**
    166      * Gets the canonical name of the converter as defined by Java
    167      * @param charsetName converter name
    168      * @return canonical name of the converter
    169      * @internal
    170      * @deprecated This API is ICU internal only.
    171      */
    172     @Deprecated
    173     public static String getJavaCanonicalName(String charsetName){
    174         /*
    175         If a charset listed in the IANA Charset Registry is supported by an implementation
    176         of the Java platform then its canonical name must be the name listed in the registry.
    177         Many charsets are given more than one name in the registry, in which case the registry
    178         identifies one of the names as MIME-preferred. If a charset has more than one registry
    179         name then its canonical name must be the MIME-preferred name and the other names in
    180         the registry must be valid aliases. If a supported charset is not listed in the IANA
    181         registry then its canonical name must begin with one of the strings "X-" or "x-".
    182         */
    183         if(charsetName==null ){
    184             return null;
    185         }
    186         try{
    187             String cName = null;
    188             /* find out the alias with MIME tag */
    189             if((cName=UConverterAlias.getStandardName(charsetName, "MIME"))!=null){
    190             /* find out the alias with IANA tag */
    191             }else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){
    192             }else {
    193                 /*
    194                     check to see if an alias already exists with x- prefix, if yes then
    195                     make that the canonical name
    196                 */
    197                 int aliasNum = UConverterAlias.countAliases(charsetName);
    198                 String name;
    199                 for(int i=0;i<aliasNum;i++){
    200                     name = UConverterAlias.getAlias(charsetName, i);
    201                     if(name!=null && name.indexOf("x-")==0){
    202                         cName = name;
    203                         break;
    204                     }
    205                 }
    206                 /* last resort just append x- to any of the alias and
    207                 make it the canonical name */
    208                 if((cName==null || cName.length()==0)){
    209                     name = UConverterAlias.getStandardName(charsetName, "UTR22");
    210                     if(name==null && charsetName.indexOf(",")!=-1){
    211                         name = UConverterAlias.getAlias(charsetName, 1);
    212                     }
    213                     /* if there is no UTR22 canonical name .. then just return itself*/
    214                     if(name==null){
    215                         name = charsetName;
    216                     }
    217                     cName = "x-"+ name;
    218                 }
    219             }
    220             return cName;
    221         }catch (IOException ex){
    222 
    223         }
    224         return null;
    225      }
    226 
    227     /**
    228      * Gets the aliases associated with the converter name
    229      * @param encName converter name
    230      * @return converter names as elements in an object array
    231      * @internal
    232      * @deprecated This API is ICU internal only.
    233      */
    234     @Deprecated
    235     private static final String[] getAliases(String encName)throws IOException{
    236         String[] ret = null;
    237         int aliasNum = 0;
    238         int i=0;
    239         int j=0;
    240         String aliasArray[/*50*/] = new String[50];
    241 
    242         if(encName != null){
    243             aliasNum = UConverterAlias.countAliases(encName);
    244             for(i=0,j=0;i<aliasNum;i++){
    245                 String name = UConverterAlias.getAlias(encName,i);
    246                 if(name.indexOf(',')==-1){
    247                     aliasArray[j++]= name;
    248                 }
    249             }
    250             ret = new String[j];
    251             for(;--j>=0;) {
    252                 ret[j] = aliasArray[j];
    253             }
    254 
    255         }
    256         return (ret);
    257 
    258     }
    259 
    260     /**
    261      * Lazy-init the icuCharsets list.
    262      * Could be done during static initialization if constructing all of the Charsets
    263      * were cheap enough. See ICU ticket #11481.
    264      */
    265     private static final synchronized void loadAvailableICUCharsets() {
    266         if (!icuCharsets.isEmpty()) {
    267             return;
    268         }
    269         List<Charset> icucs = new LinkedList<Charset>();
    270         int num = UConverterAlias.countAvailable();
    271         for (int i = 0; i < num; ++i) {
    272             String name = UConverterAlias.getAvailableName(i);
    273             try {
    274                 Charset cs = getCharset(name, "");
    275                 icucs.add(cs);
    276             } catch(UnsupportedCharsetException ex) {
    277             } catch(IOException e) {
    278             }
    279             // add only charsets that can be created!
    280         }
    281         // Unmodifiable so that charsets().next().remove() cannot change it.
    282         icuCharsets = Collections.unmodifiableList(icucs);
    283     }
    284 
    285     /**
    286      * Returns an iterator for the available ICU Charsets.
    287      * Implements the abstract method of super class.
    288      * @return the Charset iterator
    289      * @stable ICU 3.6
    290      */
    291     public final Iterator<Charset> charsets() {
    292         loadAvailableICUCharsets();
    293         return icuCharsets.iterator();
    294     }
    295 
    296     /**
    297      * Gets the canonical names of available ICU converters
    298      * @return array of available converter names
    299      * @internal
    300      * @deprecated This API is ICU internal only.
    301      */
    302     @Deprecated
    303      public static final String[] getAvailableNames() {
    304         loadAvailableICUCharsets();
    305         String[] names = new String[icuCharsets.size()];
    306         int i = 0;
    307         for (Charset cs : icuCharsets) {
    308             names[i++] = cs.name();
    309         }
    310         return names;
    311     }
    312 
    313     /**
    314      * Return all names available
    315      * @return String[] an array of all available names
    316      * @internal
    317      * @deprecated This API is ICU internal only.
    318      */
    319     @Deprecated
    320      public static final String[] getAllNames(){
    321         int num = UConverterAlias.countAvailable();
    322         String[] names = new String[num];
    323         for(int i=0;i<num;i++) {
    324             names[i] = UConverterAlias.getAvailableName(i);
    325         }
    326         return names;
    327     }
    328 }
    329