Home | History | Annotate | Download | only in util
      1 /****************************************************************
      2  * Licensed to the Apache Software Foundation (ASF) under one   *
      3  * or more contributor license agreements.  See the NOTICE file *
      4  * distributed with this work for additional information        *
      5  * regarding copyright ownership.  The ASF licenses this file   *
      6  * to you under the Apache License, Version 2.0 (the            *
      7  * "License"); you may not use this file except in compliance   *
      8  * with the License.  You may obtain a copy of the License at   *
      9  *                                                              *
     10  *   http://www.apache.org/licenses/LICENSE-2.0                 *
     11  *                                                              *
     12  * Unless required by applicable law or agreed to in writing,   *
     13  * software distributed under the License is distributed on an  *
     14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
     15  * KIND, either express or implied.  See the License for the    *
     16  * specific language governing permissions and limitations      *
     17  * under the License.                                           *
     18  ****************************************************************/
     19 
     20 package org.apache.james.mime4j.util;
     21 
     22 import java.io.UnsupportedEncodingException;
     23 import java.nio.charset.IllegalCharsetNameException;
     24 import java.nio.charset.UnsupportedCharsetException;
     25 import java.util.HashMap;
     26 import java.util.Locale;
     27 import java.util.TreeSet;
     28 
     29 //BEGIN android-changed: Stubbing out logging
     30 import org.apache.james.mime4j.Log;
     31 import org.apache.james.mime4j.LogFactory;
     32 //END android-changed
     33 
     34 /**
     35  * Utility class for working with character sets. It is somewhat similar to
     36  * the Java 1.4 <code>java.nio.charset.Charset</code> class but knows many
     37  * more aliases and is compatible with Java 1.3. It will use a simple detection
     38  * mechanism to detect what character sets the current VM supports. This will
     39  * be a sub-set of the character sets listed in the
     40  * <a href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">
     41  * Java 1.5 (J2SE5.0) Supported Encodings</a> document.
     42  * <p>
     43  * The <a href="http://www.iana.org/assignments/character-sets">
     44  * IANA Character Sets</a> document has been used to determine the preferred
     45  * MIME character set names and to get a list of known aliases.
     46  * <p>
     47  * This is a complete list of the character sets known to this class:
     48  * <table>
     49  *     <tr>
     50  *         <td>Canonical (Java) name</td>
     51  *         <td>MIME preferred</td>
     52  *         <td>Aliases</td>
     53  *     </tr>
     54  *     <tr>
     55  *         <td>ASCII</td>
     56  *         <td>US-ASCII</td>
     57  *         <td>ANSI_X3.4-1968 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ISO646-US us IBM367 cp367 csASCII ascii7 646 iso_646.irv:1983 </td>
     58  *     </tr>
     59  *     <tr>
     60  *         <td>Big5</td>
     61  *         <td>Big5</td>
     62  *         <td>csBig5 CN-Big5 BIG-FIVE BIGFIVE </td>
     63  *     </tr>
     64  *     <tr>
     65  *         <td>Big5_HKSCS</td>
     66  *         <td>Big5-HKSCS</td>
     67  *         <td>big5hkscs </td>
     68  *     </tr>
     69  *     <tr>
     70  *         <td>Big5_Solaris</td>
     71  *         <td>?</td>
     72  *         <td></td>
     73  *     </tr>
     74  *     <tr>
     75  *         <td>Cp037</td>
     76  *         <td>IBM037</td>
     77  *         <td>ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 </td>
     78  *     </tr>
     79  *     <tr>
     80  *         <td>Cp1006</td>
     81  *         <td>?</td>
     82  *         <td></td>
     83  *     </tr>
     84  *     <tr>
     85  *         <td>Cp1025</td>
     86  *         <td>?</td>
     87  *         <td></td>
     88  *     </tr>
     89  *     <tr>
     90  *         <td>Cp1026</td>
     91  *         <td>IBM1026</td>
     92  *         <td>csIBM1026 </td>
     93  *     </tr>
     94  *     <tr>
     95  *         <td>Cp1046</td>
     96  *         <td>?</td>
     97  *         <td></td>
     98  *     </tr>
     99  *     <tr>
    100  *         <td>Cp1047</td>
    101  *         <td>IBM1047</td>
    102  *         <td>IBM-1047 </td>
    103  *     </tr>
    104  *     <tr>
    105  *         <td>Cp1097</td>
    106  *         <td>?</td>
    107  *         <td></td>
    108  *     </tr>
    109  *     <tr>
    110  *         <td>Cp1098</td>
    111  *         <td>?</td>
    112  *         <td></td>
    113  *     </tr>
    114  *     <tr>
    115  *         <td>Cp1112</td>
    116  *         <td>?</td>
    117  *         <td></td>
    118  *     </tr>
    119  *     <tr>
    120  *         <td>Cp1122</td>
    121  *         <td>?</td>
    122  *         <td></td>
    123  *     </tr>
    124  *     <tr>
    125  *         <td>Cp1123</td>
    126  *         <td>?</td>
    127  *         <td></td>
    128  *     </tr>
    129  *     <tr>
    130  *         <td>Cp1124</td>
    131  *         <td>?</td>
    132  *         <td></td>
    133  *     </tr>
    134  *     <tr>
    135  *         <td>Cp1140</td>
    136  *         <td>IBM01140</td>
    137  *         <td>CCSID01140 CP01140 ebcdic-us-37+euro </td>
    138  *     </tr>
    139  *     <tr>
    140  *         <td>Cp1141</td>
    141  *         <td>IBM01141</td>
    142  *         <td>CCSID01141 CP01141 ebcdic-de-273+euro </td>
    143  *     </tr>
    144  *     <tr>
    145  *         <td>Cp1142</td>
    146  *         <td>IBM01142</td>
    147  *         <td>CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro </td>
    148  *     </tr>
    149  *     <tr>
    150  *         <td>Cp1143</td>
    151  *         <td>IBM01143</td>
    152  *         <td>CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro </td>
    153  *     </tr>
    154  *     <tr>
    155  *         <td>Cp1144</td>
    156  *         <td>IBM01144</td>
    157  *         <td>CCSID01144 CP01144 ebcdic-it-280+euro </td>
    158  *     </tr>
    159  *     <tr>
    160  *         <td>Cp1145</td>
    161  *         <td>IBM01145</td>
    162  *         <td>CCSID01145 CP01145 ebcdic-es-284+euro </td>
    163  *     </tr>
    164  *     <tr>
    165  *         <td>Cp1146</td>
    166  *         <td>IBM01146</td>
    167  *         <td>CCSID01146 CP01146 ebcdic-gb-285+euro </td>
    168  *     </tr>
    169  *     <tr>
    170  *         <td>Cp1147</td>
    171  *         <td>IBM01147</td>
    172  *         <td>CCSID01147 CP01147 ebcdic-fr-297+euro </td>
    173  *     </tr>
    174  *     <tr>
    175  *         <td>Cp1148</td>
    176  *         <td>IBM01148</td>
    177  *         <td>CCSID01148 CP01148 ebcdic-international-500+euro </td>
    178  *     </tr>
    179  *     <tr>
    180  *         <td>Cp1149</td>
    181  *         <td>IBM01149</td>
    182  *         <td>CCSID01149 CP01149 ebcdic-is-871+euro </td>
    183  *     </tr>
    184  *     <tr>
    185  *         <td>Cp1250</td>
    186  *         <td>windows-1250</td>
    187  *         <td></td>
    188  *     </tr>
    189  *     <tr>
    190  *         <td>Cp1251</td>
    191  *         <td>windows-1251</td>
    192  *         <td></td>
    193  *     </tr>
    194  *     <tr>
    195  *         <td>Cp1252</td>
    196  *         <td>windows-1252</td>
    197  *         <td></td>
    198  *     </tr>
    199  *     <tr>
    200  *         <td>Cp1253</td>
    201  *         <td>windows-1253</td>
    202  *         <td></td>
    203  *     </tr>
    204  *     <tr>
    205  *         <td>Cp1254</td>
    206  *         <td>windows-1254</td>
    207  *         <td></td>
    208  *     </tr>
    209  *     <tr>
    210  *         <td>Cp1255</td>
    211  *         <td>windows-1255</td>
    212  *         <td></td>
    213  *     </tr>
    214  *     <tr>
    215  *         <td>Cp1256</td>
    216  *         <td>windows-1256</td>
    217  *         <td></td>
    218  *     </tr>
    219  *     <tr>
    220  *         <td>Cp1257</td>
    221  *         <td>windows-1257</td>
    222  *         <td></td>
    223  *     </tr>
    224  *     <tr>
    225  *         <td>Cp1258</td>
    226  *         <td>windows-1258</td>
    227  *         <td></td>
    228  *     </tr>
    229  *     <tr>
    230  *         <td>Cp1381</td>
    231  *         <td>?</td>
    232  *         <td></td>
    233  *     </tr>
    234  *     <tr>
    235  *         <td>Cp1383</td>
    236  *         <td>?</td>
    237  *         <td></td>
    238  *     </tr>
    239  *     <tr>
    240  *         <td>Cp273</td>
    241  *         <td>IBM273</td>
    242  *         <td>csIBM273 </td>
    243  *     </tr>
    244  *     <tr>
    245  *         <td>Cp277</td>
    246  *         <td>IBM277</td>
    247  *         <td>EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 </td>
    248  *     </tr>
    249  *     <tr>
    250  *         <td>Cp278</td>
    251  *         <td>IBM278</td>
    252  *         <td>CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278 </td>
    253  *     </tr>
    254  *     <tr>
    255  *         <td>Cp280</td>
    256  *         <td>IBM280</td>
    257  *         <td>ebcdic-cp-it csIBM280 </td>
    258  *     </tr>
    259  *     <tr>
    260  *         <td>Cp284</td>
    261  *         <td>IBM284</td>
    262  *         <td>ebcdic-cp-es csIBM284 </td>
    263  *     </tr>
    264  *     <tr>
    265  *         <td>Cp285</td>
    266  *         <td>IBM285</td>
    267  *         <td>ebcdic-cp-gb csIBM285 </td>
    268  *     </tr>
    269  *     <tr>
    270  *         <td>Cp297</td>
    271  *         <td>IBM297</td>
    272  *         <td>ebcdic-cp-fr csIBM297 </td>
    273  *     </tr>
    274  *     <tr>
    275  *         <td>Cp33722</td>
    276  *         <td>?</td>
    277  *         <td></td>
    278  *     </tr>
    279  *     <tr>
    280  *         <td>Cp420</td>
    281  *         <td>IBM420</td>
    282  *         <td>ebcdic-cp-ar1 csIBM420 </td>
    283  *     </tr>
    284  *     <tr>
    285  *         <td>Cp424</td>
    286  *         <td>IBM424</td>
    287  *         <td>ebcdic-cp-he csIBM424 </td>
    288  *     </tr>
    289  *     <tr>
    290  *         <td>Cp437</td>
    291  *         <td>IBM437</td>
    292  *         <td>437 csPC8CodePage437 </td>
    293  *     </tr>
    294  *     <tr>
    295  *         <td>Cp500</td>
    296  *         <td>IBM500</td>
    297  *         <td>ebcdic-cp-be ebcdic-cp-ch csIBM500 </td>
    298  *     </tr>
    299  *     <tr>
    300  *         <td>Cp737</td>
    301  *         <td>?</td>
    302  *         <td></td>
    303  *     </tr>
    304  *     <tr>
    305  *         <td>Cp775</td>
    306  *         <td>IBM775</td>
    307  *         <td>csPC775Baltic </td>
    308  *     </tr>
    309  *     <tr>
    310  *         <td>Cp838</td>
    311  *         <td>IBM-Thai</td>
    312  *         <td></td>
    313  *     </tr>
    314  *     <tr>
    315  *         <td>Cp850</td>
    316  *         <td>IBM850</td>
    317  *         <td>850 csPC850Multilingual </td>
    318  *     </tr>
    319  *     <tr>
    320  *         <td>Cp852</td>
    321  *         <td>IBM852</td>
    322  *         <td>852 csPCp852 </td>
    323  *     </tr>
    324  *     <tr>
    325  *         <td>Cp855</td>
    326  *         <td>IBM855</td>
    327  *         <td>855 csIBM855 </td>
    328  *     </tr>
    329  *     <tr>
    330  *         <td>Cp856</td>
    331  *         <td>?</td>
    332  *         <td></td>
    333  *     </tr>
    334  *     <tr>
    335  *         <td>Cp857</td>
    336  *         <td>IBM857</td>
    337  *         <td>857 csIBM857 </td>
    338  *     </tr>
    339  *     <tr>
    340  *         <td>Cp858</td>
    341  *         <td>IBM00858</td>
    342  *         <td>CCSID00858 CP00858 PC-Multilingual-850+euro </td>
    343  *     </tr>
    344  *     <tr>
    345  *         <td>Cp860</td>
    346  *         <td>IBM860</td>
    347  *         <td>860 csIBM860 </td>
    348  *     </tr>
    349  *     <tr>
    350  *         <td>Cp861</td>
    351  *         <td>IBM861</td>
    352  *         <td>861 cp-is csIBM861 </td>
    353  *     </tr>
    354  *     <tr>
    355  *         <td>Cp862</td>
    356  *         <td>IBM862</td>
    357  *         <td>862 csPC862LatinHebrew </td>
    358  *     </tr>
    359  *     <tr>
    360  *         <td>Cp863</td>
    361  *         <td>IBM863</td>
    362  *         <td>863 csIBM863 </td>
    363  *     </tr>
    364  *     <tr>
    365  *         <td>Cp864</td>
    366  *         <td>IBM864</td>
    367  *         <td>cp864 csIBM864 </td>
    368  *     </tr>
    369  *     <tr>
    370  *         <td>Cp865</td>
    371  *         <td>IBM865</td>
    372  *         <td>865 csIBM865 </td>
    373  *     </tr>
    374  *     <tr>
    375  *         <td>Cp866</td>
    376  *         <td>IBM866</td>
    377  *         <td>866 csIBM866 </td>
    378  *     </tr>
    379  *     <tr>
    380  *         <td>Cp868</td>
    381  *         <td>IBM868</td>
    382  *         <td>cp-ar csIBM868 </td>
    383  *     </tr>
    384  *     <tr>
    385  *         <td>Cp869</td>
    386  *         <td>IBM869</td>
    387  *         <td>cp-gr csIBM869 </td>
    388  *     </tr>
    389  *     <tr>
    390  *         <td>Cp870</td>
    391  *         <td>IBM870</td>
    392  *         <td>ebcdic-cp-roece ebcdic-cp-yu csIBM870 </td>
    393  *     </tr>
    394  *     <tr>
    395  *         <td>Cp871</td>
    396  *         <td>IBM871</td>
    397  *         <td>ebcdic-cp-is csIBM871 </td>
    398  *     </tr>
    399  *     <tr>
    400  *         <td>Cp875</td>
    401  *         <td>?</td>
    402  *         <td></td>
    403  *     </tr>
    404  *     <tr>
    405  *         <td>Cp918</td>
    406  *         <td>IBM918</td>
    407  *         <td>ebcdic-cp-ar2 csIBM918 </td>
    408  *     </tr>
    409  *     <tr>
    410  *         <td>Cp921</td>
    411  *         <td>?</td>
    412  *         <td></td>
    413  *     </tr>
    414  *     <tr>
    415  *         <td>Cp922</td>
    416  *         <td>?</td>
    417  *         <td></td>
    418  *     </tr>
    419  *     <tr>
    420  *         <td>Cp930</td>
    421  *         <td>?</td>
    422  *         <td></td>
    423  *     </tr>
    424  *     <tr>
    425  *         <td>Cp933</td>
    426  *         <td>?</td>
    427  *         <td></td>
    428  *     </tr>
    429  *     <tr>
    430  *         <td>Cp935</td>
    431  *         <td>?</td>
    432  *         <td></td>
    433  *     </tr>
    434  *     <tr>
    435  *         <td>Cp937</td>
    436  *         <td>?</td>
    437  *         <td></td>
    438  *     </tr>
    439  *     <tr>
    440  *         <td>Cp939</td>
    441  *         <td>?</td>
    442  *         <td></td>
    443  *     </tr>
    444  *     <tr>
    445  *         <td>Cp942</td>
    446  *         <td>?</td>
    447  *         <td></td>
    448  *     </tr>
    449  *     <tr>
    450  *         <td>Cp942C</td>
    451  *         <td>?</td>
    452  *         <td></td>
    453  *     </tr>
    454  *     <tr>
    455  *         <td>Cp943</td>
    456  *         <td>?</td>
    457  *         <td></td>
    458  *     </tr>
    459  *     <tr>
    460  *         <td>Cp943C</td>
    461  *         <td>?</td>
    462  *         <td></td>
    463  *     </tr>
    464  *     <tr>
    465  *         <td>Cp948</td>
    466  *         <td>?</td>
    467  *         <td></td>
    468  *     </tr>
    469  *     <tr>
    470  *         <td>Cp949</td>
    471  *         <td>?</td>
    472  *         <td></td>
    473  *     </tr>
    474  *     <tr>
    475  *         <td>Cp949C</td>
    476  *         <td>?</td>
    477  *         <td></td>
    478  *     </tr>
    479  *     <tr>
    480  *         <td>Cp950</td>
    481  *         <td>?</td>
    482  *         <td></td>
    483  *     </tr>
    484  *     <tr>
    485  *         <td>Cp964</td>
    486  *         <td>?</td>
    487  *         <td></td>
    488  *     </tr>
    489  *     <tr>
    490  *         <td>Cp970</td>
    491  *         <td>?</td>
    492  *         <td></td>
    493  *     </tr>
    494  *     <tr>
    495  *         <td>EUC_CN</td>
    496  *         <td>GB2312</td>
    497  *         <td>x-EUC-CN csGB2312 euccn euc-cn gb2312-80 gb2312-1980 CN-GB CN-GB-ISOIR165 </td>
    498  *     </tr>
    499  *     <tr>
    500  *         <td>EUC_JP</td>
    501  *         <td>EUC-JP</td>
    502  *         <td>csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese eucjis x-eucjp eucjp x-euc-jp </td>
    503  *     </tr>
    504  *     <tr>
    505  *         <td>EUC_JP_LINUX</td>
    506  *         <td>?</td>
    507  *         <td></td>
    508  *     </tr>
    509  *     <tr>
    510  *         <td>EUC_JP_Solaris</td>
    511  *         <td>?</td>
    512  *         <td></td>
    513  *     </tr>
    514  *     <tr>
    515  *         <td>EUC_KR</td>
    516  *         <td>EUC-KR</td>
    517  *         <td>csEUCKR ksc5601 5601 ksc5601_1987 ksc_5601 ksc5601-1987 ks_c_5601-1987 euckr </td>
    518  *     </tr>
    519  *     <tr>
    520  *         <td>EUC_TW</td>
    521  *         <td>EUC-TW</td>
    522  *         <td>x-EUC-TW cns11643 euctw </td>
    523  *     </tr>
    524  *     <tr>
    525  *         <td>GB18030</td>
    526  *         <td>GB18030</td>
    527  *         <td>gb18030-2000 </td>
    528  *     </tr>
    529  *     <tr>
    530  *         <td>GBK</td>
    531  *         <td>windows-936</td>
    532  *         <td>CP936 MS936 ms_936 x-mswin-936 </td>
    533  *     </tr>
    534  *     <tr>
    535  *         <td>ISCII91</td>
    536  *         <td>?</td>
    537  *         <td>x-ISCII91 iscii </td>
    538  *     </tr>
    539  *     <tr>
    540  *         <td>ISO2022CN</td>
    541  *         <td>ISO-2022-CN</td>
    542  *         <td></td>
    543  *     </tr>
    544  *     <tr>
    545  *         <td>ISO2022JP</td>
    546  *         <td>ISO-2022-JP</td>
    547  *         <td>csISO2022JP JIS jis_encoding csjisencoding </td>
    548  *     </tr>
    549  *     <tr>
    550  *         <td>ISO2022KR</td>
    551  *         <td>ISO-2022-KR</td>
    552  *         <td>csISO2022KR </td>
    553  *     </tr>
    554  *     <tr>
    555  *         <td>ISO2022_CN_CNS</td>
    556  *         <td>?</td>
    557  *         <td></td>
    558  *     </tr>
    559  *     <tr>
    560  *         <td>ISO2022_CN_GB</td>
    561  *         <td>?</td>
    562  *         <td></td>
    563  *     </tr>
    564  *     <tr>
    565  *         <td>ISO8859_1</td>
    566  *         <td>ISO-8859-1</td>
    567  *         <td>ISO_8859-1:1987 iso-ir-100 ISO_8859-1 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 819 IBM-819 ISO8859-1 ISO_8859_1 </td>
    568  *     </tr>
    569  *     <tr>
    570  *         <td>ISO8859_13</td>
    571  *         <td>ISO-8859-13</td>
    572  *         <td></td>
    573  *     </tr>
    574  *     <tr>
    575  *         <td>ISO8859_15</td>
    576  *         <td>ISO-8859-15</td>
    577  *         <td>ISO_8859-15 Latin-9 8859_15 csISOlatin9 IBM923 cp923 923 L9 IBM-923 ISO8859-15 LATIN9 LATIN0 csISOlatin0 ISO8859_15_FDIS </td>
    578  *     </tr>
    579  *     <tr>
    580  *         <td>ISO8859_2</td>
    581  *         <td>ISO-8859-2</td>
    582  *         <td>ISO_8859-2:1987 iso-ir-101 ISO_8859-2 latin2 l2 csISOLatin2 8859_2 iso8859_2 </td>
    583  *     </tr>
    584  *     <tr>
    585  *         <td>ISO8859_3</td>
    586  *         <td>ISO-8859-3</td>
    587  *         <td>ISO_8859-3:1988 iso-ir-109 ISO_8859-3 latin3 l3 csISOLatin3 8859_3 </td>
    588  *     </tr>
    589  *     <tr>
    590  *         <td>ISO8859_4</td>
    591  *         <td>ISO-8859-4</td>
    592  *         <td>ISO_8859-4:1988 iso-ir-110 ISO_8859-4 latin4 l4 csISOLatin4 8859_4 </td>
    593  *     </tr>
    594  *     <tr>
    595  *         <td>ISO8859_5</td>
    596  *         <td>ISO-8859-5</td>
    597  *         <td>ISO_8859-5:1988 iso-ir-144 ISO_8859-5 cyrillic csISOLatinCyrillic 8859_5 </td>
    598  *     </tr>
    599  *     <tr>
    600  *         <td>ISO8859_6</td>
    601  *         <td>ISO-8859-6</td>
    602  *         <td>ISO_8859-6:1987 iso-ir-127 ISO_8859-6 ECMA-114 ASMO-708 arabic csISOLatinArabic 8859_6 </td>
    603  *     </tr>
    604  *     <tr>
    605  *         <td>ISO8859_7</td>
    606  *         <td>ISO-8859-7</td>
    607  *         <td>ISO_8859-7:1987 iso-ir-126 ISO_8859-7 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 sun_eu_greek </td>
    608  *     </tr>
    609  *     <tr>
    610  *         <td>ISO8859_8</td>
    611  *         <td>ISO-8859-8</td>
    612  *         <td>ISO_8859-8:1988 iso-ir-138 ISO_8859-8 hebrew csISOLatinHebrew 8859_8 </td>
    613  *     </tr>
    614  *     <tr>
    615  *         <td>ISO8859_9</td>
    616  *         <td>ISO-8859-9</td>
    617  *         <td>ISO_8859-9:1989 iso-ir-148 ISO_8859-9 latin5 l5 csISOLatin5 8859_9 </td>
    618  *     </tr>
    619  *     <tr>
    620  *         <td>JISAutoDetect</td>
    621  *         <td>?</td>
    622  *         <td></td>
    623  *     </tr>
    624  *     <tr>
    625  *         <td>JIS_C6626-1983</td>
    626  *         <td>JIS_C6626-1983</td>
    627  *         <td>x-JIS0208 JIS0208 csISO87JISX0208 x0208 JIS_X0208-1983 iso-ir-87 </td>
    628  *     </tr>
    629  *     <tr>
    630  *         <td>JIS_X0201</td>
    631  *         <td>JIS_X0201</td>
    632  *         <td>X0201 JIS0201 csHalfWidthKatakana </td>
    633  *     </tr>
    634  *     <tr>
    635  *         <td>JIS_X0212-1990</td>
    636  *         <td>JIS_X0212-1990</td>
    637  *         <td>iso-ir-159 x0212 JIS0212 csISO159JISX02121990 </td>
    638  *     </tr>
    639  *     <tr>
    640  *         <td>KOI8_R</td>
    641  *         <td>KOI8-R</td>
    642  *         <td>csKOI8R koi8 </td>
    643  *     </tr>
    644  *     <tr>
    645  *         <td>MS874</td>
    646  *         <td>windows-874</td>
    647  *         <td>cp874 </td>
    648  *     </tr>
    649  *     <tr>
    650  *         <td>MS932</td>
    651  *         <td>Windows-31J</td>
    652  *         <td>windows-932 csWindows31J x-ms-cp932 </td>
    653  *     </tr>
    654  *     <tr>
    655  *         <td>MS949</td>
    656  *         <td>windows-949</td>
    657  *         <td>windows949 ms_949 x-windows-949 </td>
    658  *     </tr>
    659  *     <tr>
    660  *         <td>MS950</td>
    661  *         <td>windows-950</td>
    662  *         <td>x-windows-950 </td>
    663  *     </tr>
    664  *     <tr>
    665  *         <td>MS950_HKSCS</td>
    666  *         <td></td>
    667  *         <td></td>
    668  *     </tr>
    669  *     <tr>
    670  *         <td>MacArabic</td>
    671  *         <td>?</td>
    672  *         <td></td>
    673  *     </tr>
    674  *     <tr>
    675  *         <td>MacCentralEurope</td>
    676  *         <td>?</td>
    677  *         <td></td>
    678  *     </tr>
    679  *     <tr>
    680  *         <td>MacCroatian</td>
    681  *         <td>?</td>
    682  *         <td></td>
    683  *     </tr>
    684  *     <tr>
    685  *         <td>MacCyrillic</td>
    686  *         <td>?</td>
    687  *         <td></td>
    688  *     </tr>
    689  *     <tr>
    690  *         <td>MacDingbat</td>
    691  *         <td>?</td>
    692  *         <td></td>
    693  *     </tr>
    694  *     <tr>
    695  *         <td>MacGreek</td>
    696  *         <td>MacGreek</td>
    697  *         <td></td>
    698  *     </tr>
    699  *     <tr>
    700  *         <td>MacHebrew</td>
    701  *         <td>?</td>
    702  *         <td></td>
    703  *     </tr>
    704  *     <tr>
    705  *         <td>MacIceland</td>
    706  *         <td>?</td>
    707  *         <td></td>
    708  *     </tr>
    709  *     <tr>
    710  *         <td>MacRoman</td>
    711  *         <td>MacRoman</td>
    712  *         <td>Macintosh MAC csMacintosh </td>
    713  *     </tr>
    714  *     <tr>
    715  *         <td>MacRomania</td>
    716  *         <td>?</td>
    717  *         <td></td>
    718  *     </tr>
    719  *     <tr>
    720  *         <td>MacSymbol</td>
    721  *         <td>?</td>
    722  *         <td></td>
    723  *     </tr>
    724  *     <tr>
    725  *         <td>MacThai</td>
    726  *         <td>?</td>
    727  *         <td></td>
    728  *     </tr>
    729  *     <tr>
    730  *         <td>MacTurkish</td>
    731  *         <td>?</td>
    732  *         <td></td>
    733  *     </tr>
    734  *     <tr>
    735  *         <td>MacUkraine</td>
    736  *         <td>?</td>
    737  *         <td></td>
    738  *     </tr>
    739  *     <tr>
    740  *         <td>SJIS</td>
    741  *         <td>Shift_JIS</td>
    742  *         <td>MS_Kanji csShiftJIS shift-jis x-sjis pck </td>
    743  *     </tr>
    744  *     <tr>
    745  *         <td>TIS620</td>
    746  *         <td>TIS-620</td>
    747  *         <td></td>
    748  *     </tr>
    749  *     <tr>
    750  *         <td>UTF-16</td>
    751  *         <td>UTF-16</td>
    752  *         <td>UTF_16 </td>
    753  *     </tr>
    754  *     <tr>
    755  *         <td>UTF8</td>
    756  *         <td>UTF-8</td>
    757  *         <td></td>
    758  *     </tr>
    759  *     <tr>
    760  *         <td>UnicodeBig</td>
    761  *         <td>?</td>
    762  *         <td></td>
    763  *     </tr>
    764  *     <tr>
    765  *         <td>UnicodeBigUnmarked</td>
    766  *         <td>UTF-16BE</td>
    767  *         <td>X-UTF-16BE UTF_16BE ISO-10646-UCS-2 </td>
    768  *     </tr>
    769  *     <tr>
    770  *         <td>UnicodeLittle</td>
    771  *         <td>?</td>
    772  *         <td></td>
    773  *     </tr>
    774  *     <tr>
    775  *         <td>UnicodeLittleUnmarked</td>
    776  *         <td>UTF-16LE</td>
    777  *         <td>UTF_16LE X-UTF-16LE </td>
    778  *     </tr>
    779  *     <tr>
    780  *         <td>x-Johab</td>
    781  *         <td>johab</td>
    782  *         <td>johab cp1361 ms1361 ksc5601-1992 ksc5601_1992 </td>
    783  *     </tr>
    784  *     <tr>
    785  *         <td>x-iso-8859-11</td>
    786  *         <td>?</td>
    787  *         <td></td>
    788  *     </tr>
    789  * </table>
    790  *
    791  *
    792  * @version $Id: CharsetUtil.java,v 1.1 2004/10/25 07:26:46 ntherning Exp $
    793  */
    794 public class CharsetUtil {
    795     private static Log log = LogFactory.getLog(CharsetUtil.class);
    796 
    797     private static class Charset implements Comparable<Charset> {
    798         private String canonical = null;
    799         private String mime = null;
    800         private String[] aliases = null;
    801 
    802         private Charset(String canonical, String mime, String[] aliases) {
    803             this.canonical = canonical;
    804             this.mime = mime;
    805             this.aliases = aliases;
    806         }
    807 
    808         public int compareTo(Charset c) {
    809             return this.canonical.compareTo(c.canonical);
    810         }
    811     }
    812 
    813     private static Charset[] JAVA_CHARSETS = {
    814         new Charset("ISO8859_1", "ISO-8859-1",
    815                     new String[] {"ISO_8859-1:1987", "iso-ir-100", "ISO_8859-1",
    816                                   "latin1", "l1", "IBM819", "CP819",
    817                                   "csISOLatin1", "8859_1", "819", "IBM-819",
    818                                   "ISO8859-1", "ISO_8859_1"}),
    819         new Charset("ISO8859_2", "ISO-8859-2",
    820                     new String[] {"ISO_8859-2:1987", "iso-ir-101", "ISO_8859-2",
    821                                   "latin2", "l2", "csISOLatin2", "8859_2",
    822                                   "iso8859_2"}),
    823         new Charset("ISO8859_3", "ISO-8859-3", new String[] {"ISO_8859-3:1988", "iso-ir-109", "ISO_8859-3", "latin3", "l3", "csISOLatin3", "8859_3"}),
    824         new Charset("ISO8859_4", "ISO-8859-4",
    825                     new String[] {"ISO_8859-4:1988", "iso-ir-110", "ISO_8859-4",
    826                                   "latin4", "l4", "csISOLatin4", "8859_4"}),
    827         new Charset("ISO8859_5", "ISO-8859-5",
    828                     new String[] {"ISO_8859-5:1988", "iso-ir-144", "ISO_8859-5",
    829                                   "cyrillic", "csISOLatinCyrillic", "8859_5"}),
    830         new Charset("ISO8859_6", "ISO-8859-6", new String[] {"ISO_8859-6:1987", "iso-ir-127", "ISO_8859-6", "ECMA-114", "ASMO-708", "arabic", "csISOLatinArabic", "8859_6"}),
    831         new Charset("ISO8859_7", "ISO-8859-7",
    832                     new String[] {"ISO_8859-7:1987", "iso-ir-126", "ISO_8859-7",
    833                                   "ELOT_928", "ECMA-118", "greek", "greek8",
    834                                   "csISOLatinGreek", "8859_7", "sun_eu_greek"}),
    835         new Charset("ISO8859_8", "ISO-8859-8", new String[] {"ISO_8859-8:1988", "iso-ir-138", "ISO_8859-8", "hebrew", "csISOLatinHebrew", "8859_8"}),
    836         new Charset("ISO8859_9", "ISO-8859-9",
    837                     new String[] {"ISO_8859-9:1989", "iso-ir-148", "ISO_8859-9",
    838                                   "latin5", "l5", "csISOLatin5", "8859_9"}),
    839 
    840         new Charset("ISO8859_13", "ISO-8859-13", new String[] {}),
    841         new Charset("ISO8859_15", "ISO-8859-15",
    842                     new String[] {"ISO_8859-15", "Latin-9", "8859_15",
    843                                   "csISOlatin9", "IBM923", "cp923", "923", "L9",
    844                                   "IBM-923", "ISO8859-15", "LATIN9", "LATIN0",
    845                                   "csISOlatin0", "ISO8859_15_FDIS"}),
    846         new Charset("KOI8_R", "KOI8-R", new String[] {"csKOI8R", "koi8"}),
    847         new Charset("ASCII", "US-ASCII",
    848                     new String[] {"ANSI_X3.4-1968", "iso-ir-6",
    849                                   "ANSI_X3.4-1986", "ISO_646.irv:1991",
    850                                   "ISO646-US", "us", "IBM367", "cp367",
    851                                   "csASCII", "ascii7", "646", "iso_646.irv:1983"}),
    852         new Charset("UTF8", "UTF-8", new String[] {}),
    853         new Charset("UTF-16", "UTF-16", new String[] {"UTF_16"}),
    854         new Charset("UnicodeBigUnmarked", "UTF-16BE", new String[] {"X-UTF-16BE", "UTF_16BE", "ISO-10646-UCS-2"}),
    855         new Charset("UnicodeLittleUnmarked", "UTF-16LE", new String[] {"UTF_16LE", "X-UTF-16LE"}),
    856         new Charset("Big5", "Big5", new String[] {"csBig5", "CN-Big5", "BIG-FIVE", "BIGFIVE"}),
    857         new Charset("Big5_HKSCS", "Big5-HKSCS", new String[] {"big5hkscs"}),
    858         new Charset("EUC_JP", "EUC-JP",
    859                     new String[] {"csEUCPkdFmtJapanese",
    860                               "Extended_UNIX_Code_Packed_Format_for_Japanese",
    861                               "eucjis", "x-eucjp", "eucjp", "x-euc-jp"}),
    862         new Charset("EUC_KR", "EUC-KR",
    863                     new String[] {"csEUCKR", "ksc5601", "5601", "ksc5601_1987",
    864                                   "ksc_5601", "ksc5601-1987", "ks_c_5601-1987",
    865                                   "euckr"}),
    866         new Charset("GB18030", "GB18030", new String[] {"gb18030-2000"}),
    867         new Charset("EUC_CN", "GB2312", new String[] {"x-EUC-CN", "csGB2312", "euccn", "euc-cn", "gb2312-80", "gb2312-1980", "CN-GB", "CN-GB-ISOIR165"}),
    868         new Charset("GBK", "windows-936", new String[] {"CP936", "MS936", "ms_936", "x-mswin-936"}),
    869 
    870         new Charset("Cp037", "IBM037", new String[] {"ebcdic-cp-us", "ebcdic-cp-ca", "ebcdic-cp-wt", "ebcdic-cp-nl", "csIBM037"}),
    871         new Charset("Cp273", "IBM273", new String[] {"csIBM273"}),
    872         new Charset("Cp277", "IBM277", new String[] {"EBCDIC-CP-DK", "EBCDIC-CP-NO", "csIBM277"}),
    873         new Charset("Cp278", "IBM278", new String[] {"CP278", "ebcdic-cp-fi", "ebcdic-cp-se", "csIBM278"}),
    874         new Charset("Cp280", "IBM280", new String[] {"ebcdic-cp-it", "csIBM280"}),
    875         new Charset("Cp284", "IBM284", new String[] {"ebcdic-cp-es", "csIBM284"}),
    876         new Charset("Cp285", "IBM285", new String[] {"ebcdic-cp-gb", "csIBM285"}),
    877         new Charset("Cp297", "IBM297", new String[] {"ebcdic-cp-fr", "csIBM297"}),
    878         new Charset("Cp420", "IBM420", new String[] {"ebcdic-cp-ar1", "csIBM420"}),
    879         new Charset("Cp424", "IBM424", new String[] {"ebcdic-cp-he", "csIBM424"}),
    880         new Charset("Cp437", "IBM437", new String[] {"437", "csPC8CodePage437"}),
    881         new Charset("Cp500", "IBM500", new String[] {"ebcdic-cp-be", "ebcdic-cp-ch", "csIBM500"}),
    882         new Charset("Cp775", "IBM775", new String[] {"csPC775Baltic"}),
    883         new Charset("Cp838", "IBM-Thai", new String[] {}),
    884         new Charset("Cp850", "IBM850", new String[] {"850", "csPC850Multilingual"}),
    885         new Charset("Cp852", "IBM852", new String[] {"852", "csPCp852"}),
    886         new Charset("Cp855", "IBM855", new String[] {"855", "csIBM855"}),
    887         new Charset("Cp857", "IBM857", new String[] {"857", "csIBM857"}),
    888         new Charset("Cp858", "IBM00858",
    889                 new String[] {"CCSID00858", "CP00858",
    890                               "PC-Multilingual-850+euro"}),
    891         new Charset("Cp860", "IBM860", new String[] {"860", "csIBM860"}),
    892         new Charset("Cp861", "IBM861", new String[] {"861", "cp-is", "csIBM861"}),
    893         new Charset("Cp862", "IBM862", new String[] {"862", "csPC862LatinHebrew"}),
    894         new Charset("Cp863", "IBM863", new String[] {"863", "csIBM863"}),
    895         new Charset("Cp864", "IBM864", new String[] {"cp864", "csIBM864"}),
    896         new Charset("Cp865", "IBM865", new String[] {"865", "csIBM865"}),
    897         new Charset("Cp866", "IBM866", new String[] {"866", "csIBM866"}),
    898         new Charset("Cp868", "IBM868", new String[] {"cp-ar", "csIBM868"}),
    899         new Charset("Cp869", "IBM869", new String[] {"cp-gr", "csIBM869"}),
    900         new Charset("Cp870", "IBM870", new String[] {"ebcdic-cp-roece", "ebcdic-cp-yu", "csIBM870"}),
    901         new Charset("Cp871", "IBM871", new String[] {"ebcdic-cp-is", "csIBM871"}),
    902         new Charset("Cp918", "IBM918", new String[] {"ebcdic-cp-ar2", "csIBM918"}),
    903         new Charset("Cp1026", "IBM1026", new String[] {"csIBM1026"}),
    904         new Charset("Cp1047", "IBM1047", new String[] {"IBM-1047"}),
    905         new Charset("Cp1140", "IBM01140",
    906                     new String[] {"CCSID01140", "CP01140",
    907                                   "ebcdic-us-37+euro"}),
    908         new Charset("Cp1141", "IBM01141",
    909                     new String[] {"CCSID01141", "CP01141",
    910                                   "ebcdic-de-273+euro"}),
    911         new Charset("Cp1142", "IBM01142", new String[] {"CCSID01142", "CP01142", "ebcdic-dk-277+euro", "ebcdic-no-277+euro"}),
    912         new Charset("Cp1143", "IBM01143", new String[] {"CCSID01143", "CP01143", "ebcdic-fi-278+euro", "ebcdic-se-278+euro"}),
    913         new Charset("Cp1144", "IBM01144", new String[] {"CCSID01144", "CP01144", "ebcdic-it-280+euro"}),
    914         new Charset("Cp1145", "IBM01145", new String[] {"CCSID01145", "CP01145", "ebcdic-es-284+euro"}),
    915         new Charset("Cp1146", "IBM01146", new String[] {"CCSID01146", "CP01146", "ebcdic-gb-285+euro"}),
    916         new Charset("Cp1147", "IBM01147", new String[] {"CCSID01147", "CP01147", "ebcdic-fr-297+euro"}),
    917         new Charset("Cp1148", "IBM01148", new String[] {"CCSID01148", "CP01148", "ebcdic-international-500+euro"}),
    918         new Charset("Cp1149", "IBM01149", new String[] {"CCSID01149", "CP01149", "ebcdic-is-871+euro"}),
    919         new Charset("Cp1250", "windows-1250", new String[] {}),
    920         new Charset("Cp1251", "windows-1251", new String[] {}),
    921         new Charset("Cp1252", "windows-1252", new String[] {}),
    922         new Charset("Cp1253", "windows-1253", new String[] {}),
    923         new Charset("Cp1254", "windows-1254", new String[] {}),
    924         new Charset("Cp1255", "windows-1255", new String[] {}),
    925         new Charset("Cp1256", "windows-1256", new String[] {}),
    926         new Charset("Cp1257", "windows-1257", new String[] {}),
    927         new Charset("Cp1258", "windows-1258", new String[] {}),
    928         new Charset("ISO2022CN", "ISO-2022-CN", new String[] {}),
    929         new Charset("ISO2022JP", "ISO-2022-JP", new String[] {"csISO2022JP", "JIS", "jis_encoding", "csjisencoding"}),
    930         new Charset("ISO2022KR", "ISO-2022-KR", new String[] {"csISO2022KR"}),
    931         new Charset("JIS_X0201", "JIS_X0201", new String[] {"X0201", "JIS0201", "csHalfWidthKatakana"}),
    932         new Charset("JIS_X0212-1990", "JIS_X0212-1990", new String[] {"iso-ir-159", "x0212", "JIS0212", "csISO159JISX02121990"}),
    933         new Charset("JIS_C6626-1983", "JIS_C6626-1983", new String[] {"x-JIS0208", "JIS0208", "csISO87JISX0208", "x0208", "JIS_X0208-1983", "iso-ir-87"}),
    934         new Charset("SJIS", "Shift_JIS", new String[] {"MS_Kanji", "csShiftJIS", "shift-jis", "x-sjis", "pck"}),
    935         new Charset("TIS620", "TIS-620", new String[] {}),
    936         new Charset("MS932", "Windows-31J", new String[] {"windows-932", "csWindows31J", "x-ms-cp932"}),
    937         new Charset("EUC_TW", "EUC-TW", new String[] {"x-EUC-TW", "cns11643", "euctw"}),
    938         new Charset("x-Johab", "johab", new String[] {"johab", "cp1361", "ms1361", "ksc5601-1992", "ksc5601_1992"}),
    939         new Charset("MS950_HKSCS", "", new String[] {}),
    940         new Charset("MS874", "windows-874", new String[] {"cp874"}),
    941         new Charset("MS949", "windows-949", new String[] {"windows949", "ms_949", "x-windows-949"}),
    942         new Charset("MS950", "windows-950", new String[] {"x-windows-950"}),
    943 
    944         new Charset("Cp737", null, new String[] {}),
    945         new Charset("Cp856", null, new String[] {}),
    946         new Charset("Cp875", null, new String[] {}),
    947         new Charset("Cp921", null, new String[] {}),
    948         new Charset("Cp922", null, new String[] {}),
    949         new Charset("Cp930", null, new String[] {}),
    950         new Charset("Cp933", null, new String[] {}),
    951         new Charset("Cp935", null, new String[] {}),
    952         new Charset("Cp937", null, new String[] {}),
    953         new Charset("Cp939", null, new String[] {}),
    954         new Charset("Cp942", null, new String[] {}),
    955         new Charset("Cp942C", null, new String[] {}),
    956         new Charset("Cp943", null, new String[] {}),
    957         new Charset("Cp943C", null, new String[] {}),
    958         new Charset("Cp948", null, new String[] {}),
    959         new Charset("Cp949", null, new String[] {}),
    960         new Charset("Cp949C", null, new String[] {}),
    961         new Charset("Cp950", null, new String[] {}),
    962         new Charset("Cp964", null, new String[] {}),
    963         new Charset("Cp970", null, new String[] {}),
    964         new Charset("Cp1006", null, new String[] {}),
    965         new Charset("Cp1025", null, new String[] {}),
    966         new Charset("Cp1046", null, new String[] {}),
    967         new Charset("Cp1097", null, new String[] {}),
    968         new Charset("Cp1098", null, new String[] {}),
    969         new Charset("Cp1112", null, new String[] {}),
    970         new Charset("Cp1122", null, new String[] {}),
    971         new Charset("Cp1123", null, new String[] {}),
    972         new Charset("Cp1124", null, new String[] {}),
    973         new Charset("Cp1381", null, new String[] {}),
    974         new Charset("Cp1383", null, new String[] {}),
    975         new Charset("Cp33722", null, new String[] {}),
    976         new Charset("Big5_Solaris", null, new String[] {}),
    977         new Charset("EUC_JP_LINUX", null, new String[] {}),
    978         new Charset("EUC_JP_Solaris", null, new String[] {}),
    979         new Charset("ISCII91", null, new String[] {"x-ISCII91", "iscii"}),
    980         new Charset("ISO2022_CN_CNS", null, new String[] {}),
    981         new Charset("ISO2022_CN_GB", null, new String[] {}),
    982         new Charset("x-iso-8859-11", null, new String[] {}),
    983         new Charset("JISAutoDetect", null, new String[] {}),
    984         new Charset("MacArabic", null, new String[] {}),
    985         new Charset("MacCentralEurope", null, new String[] {}),
    986         new Charset("MacCroatian", null, new String[] {}),
    987         new Charset("MacCyrillic", null, new String[] {}),
    988         new Charset("MacDingbat", null, new String[] {}),
    989         new Charset("MacGreek", "MacGreek", new String[] {}),
    990         new Charset("MacHebrew", null, new String[] {}),
    991         new Charset("MacIceland", null, new String[] {}),
    992         new Charset("MacRoman", "MacRoman", new String[] {"Macintosh", "MAC", "csMacintosh"}),
    993         new Charset("MacRomania", null, new String[] {}),
    994         new Charset("MacSymbol", null, new String[] {}),
    995         new Charset("MacThai", null, new String[] {}),
    996         new Charset("MacTurkish", null, new String[] {}),
    997         new Charset("MacUkraine", null, new String[] {}),
    998         new Charset("UnicodeBig", null, new String[] {}),
    999         new Charset("UnicodeLittle", null, new String[] {})
   1000     };
   1001 
   1002     /**
   1003      * Contains the canonical names of character sets which can be used to
   1004      * decode bytes into Java chars.
   1005      */
   1006     private static TreeSet<String> decodingSupported = null;
   1007 
   1008     /**
   1009      * Contains the canonical names of character sets which can be used to
   1010      * encode Java chars into bytes.
   1011      */
   1012     private static TreeSet<String> encodingSupported = null;
   1013 
   1014     /**
   1015      * Maps character set names to Charset objects. All possible names of
   1016      * a charset will be mapped to the Charset.
   1017      */
   1018     private static HashMap<String, Charset> charsetMap = null;
   1019 
   1020     static {
   1021         decodingSupported = new TreeSet<String>();
   1022         encodingSupported = new TreeSet<String>();
   1023         byte[] dummy = new byte[] {'d', 'u', 'm', 'm', 'y'};
   1024         for (int i = 0; i < JAVA_CHARSETS.length; i++) {
   1025             try {
   1026                 String s = new String(dummy, JAVA_CHARSETS[i].canonical);
   1027                 decodingSupported.add(JAVA_CHARSETS[i].canonical.toLowerCase(Locale.US));
   1028             } catch (UnsupportedOperationException e) {
   1029             } catch (UnsupportedEncodingException e) {
   1030             }
   1031             try {
   1032                 "dummy".getBytes(JAVA_CHARSETS[i].canonical);
   1033                 encodingSupported.add(JAVA_CHARSETS[i].canonical.toLowerCase(Locale.US));
   1034             } catch (UnsupportedOperationException e) {
   1035             } catch (UnsupportedEncodingException e) {
   1036             }
   1037         }
   1038 
   1039         charsetMap = new HashMap<String, Charset>();
   1040         for (int i = 0; i < JAVA_CHARSETS.length; i++) {
   1041             Charset c = JAVA_CHARSETS[i];
   1042             charsetMap.put(c.canonical.toLowerCase(Locale.US), c);
   1043             if (c.mime != null) {
   1044                 charsetMap.put(c.mime.toLowerCase(Locale.US), c);
   1045             }
   1046             if (c.aliases != null) {
   1047                 for (int j = 0; j < c.aliases.length; j++) {
   1048                     charsetMap.put(c.aliases[j].toLowerCase(Locale.US), c);
   1049                 }
   1050             }
   1051         }
   1052 
   1053         if (log.isDebugEnabled()) {
   1054             log.debug("Character sets which support decoding: "
   1055                         + decodingSupported);
   1056             log.debug("Character sets which support encoding: "
   1057                         + encodingSupported);
   1058         }
   1059     }
   1060 
   1061     /**
   1062      * ANDROID:  THE FOLLOWING SET OF STATIC STRINGS ARE COPIED FROM A NEWER VERSION OF MIME4J
   1063      */
   1064 
   1065     /** carriage return - line feed sequence */
   1066     public static final String CRLF = "\r\n";
   1067 
   1068     /** US-ASCII CR, carriage return (13) */
   1069     public static final int CR = '\r';
   1070 
   1071     /** US-ASCII LF, line feed (10) */
   1072     public static final int LF = '\n';
   1073 
   1074     /** US-ASCII SP, space (32) */
   1075     public static final int SP = ' ';
   1076 
   1077     /** US-ASCII HT, horizontal-tab (9)*/
   1078     public static final int HT = '\t';
   1079 
   1080     public static final java.nio.charset.Charset US_ASCII = java.nio.charset.Charset
   1081             .forName("US-ASCII");
   1082 
   1083     public static final java.nio.charset.Charset ISO_8859_1 = java.nio.charset.Charset
   1084             .forName("ISO-8859-1");
   1085 
   1086     public static final java.nio.charset.Charset UTF_8 = java.nio.charset.Charset
   1087             .forName("UTF-8");
   1088 
   1089     /**
   1090      * Returns <code>true</code> if the specified character is a whitespace
   1091      * character (CR, LF, SP or HT).
   1092      *
   1093      * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
   1094      *
   1095      * @param ch
   1096      *            character to test.
   1097      * @return <code>true</code> if the specified character is a whitespace
   1098      *         character, <code>false</code> otherwise.
   1099      */
   1100     public static boolean isWhitespace(char ch) {
   1101         return ch == SP || ch == HT || ch == CR || ch == LF;
   1102     }
   1103 
   1104     /**
   1105      * Returns <code>true</code> if the specified string consists entirely of
   1106      * whitespace characters.
   1107      *
   1108      * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
   1109      *
   1110      * @param s
   1111      *            string to test.
   1112      * @return <code>true</code> if the specified string consists entirely of
   1113      *         whitespace characters, <code>false</code> otherwise.
   1114      */
   1115     public static boolean isWhitespace(final String s) {
   1116         if (s == null) {
   1117             throw new IllegalArgumentException("String may not be null");
   1118         }
   1119         final int len = s.length();
   1120         for (int i = 0; i < len; i++) {
   1121             if (!isWhitespace(s.charAt(i))) {
   1122                 return false;
   1123             }
   1124         }
   1125         return true;
   1126     }
   1127 
   1128     /**
   1129      * Determines if the VM supports encoding (chars to bytes) the
   1130      * specified character set. NOTE: the given character set name may
   1131      * not be known to the VM even if this method returns <code>true</code>.
   1132      * Use {@link #toJavaCharset(String)} to get the canonical Java character
   1133      * set name.
   1134      *
   1135      * @param charsetName the characters set name.
   1136      * @return <code>true</code> if encoding is supported, <code>false</code>
   1137      *         otherwise.
   1138      */
   1139     public static boolean isEncodingSupported(String charsetName) {
   1140         return encodingSupported.contains(charsetName.toLowerCase(Locale.US));
   1141     }
   1142 
   1143     /**
   1144      * Determines if the VM supports decoding (bytes to chars) the
   1145      * specified character set. NOTE: the given character set name may
   1146      * not be known to the VM even if this method returns <code>true</code>.
   1147      * Use {@link #toJavaCharset(String)} to get the canonical Java character
   1148      * set name.
   1149      *
   1150      * @param charsetName the characters set name.
   1151      * @return <code>true</code> if decoding is supported, <code>false</code>
   1152      *         otherwise.
   1153      */
   1154     public static boolean isDecodingSupported(String charsetName) {
   1155         return decodingSupported.contains(charsetName.toLowerCase(Locale.US));
   1156     }
   1157 
   1158     /**
   1159      * Gets the preferred MIME character set name for the specified
   1160      * character set or <code>null</code> if not known.
   1161      *
   1162      * @param charsetName the character set name to look for.
   1163      * @return the MIME preferred name or <code>null</code> if not known.
   1164      */
   1165     public static String toMimeCharset(String charsetName) {
   1166         Charset c = charsetMap.get(charsetName.toLowerCase(Locale.US));
   1167         if (c != null) {
   1168             return c.mime;
   1169         }
   1170         return null;
   1171     }
   1172 
   1173     /**
   1174      * Gets the canonical Java character set name for the specified
   1175      * character set or <code>null</code> if not known. This should be
   1176      * called before doing any conversions using the Java API. NOTE:
   1177      * you must use {@link #isEncodingSupported(String)} or
   1178      * {@link #isDecodingSupported(String)} to make sure the returned
   1179      * Java character set is supported by the current VM.
   1180      *
   1181      * @param charsetName the character set name to look for.
   1182      * @return the canonical Java name or <code>null</code> if not known.
   1183      */
   1184     public static String toJavaCharset(String charsetName) {
   1185         Charset c = charsetMap.get(charsetName.toLowerCase(Locale.US));
   1186         if (c != null) {
   1187             return c.canonical;
   1188         }
   1189         return null;
   1190     }
   1191 
   1192     public static java.nio.charset.Charset getCharset(String charsetName) {
   1193         String defaultCharset = "ISO-8859-1";
   1194 
   1195         // Use the default chareset if given charset is null
   1196         if(charsetName == null) charsetName = defaultCharset;
   1197 
   1198         try {
   1199             return java.nio.charset.Charset.forName(charsetName);
   1200         } catch (IllegalCharsetNameException e) {
   1201             log.info("Illegal charset " + charsetName + ", fallback to " +
   1202                     defaultCharset + ": " + e);
   1203             // Use default charset on exception
   1204             return java.nio.charset.Charset.forName(defaultCharset);
   1205         } catch (UnsupportedCharsetException ex) {
   1206             log.info("Unsupported charset " + charsetName + ", fallback to " +
   1207                     defaultCharset + ": " + ex);
   1208             // Use default charset on exception
   1209             return java.nio.charset.Charset.forName(defaultCharset);
   1210         }
   1211 
   1212     }
   1213     /*
   1214      * Uncomment the code below and run the main method to regenerate the
   1215      * Javadoc table above when the known charsets change.
   1216      */
   1217 
   1218     /*
   1219     private static String dumpHtmlTable() {
   1220         LinkedList l = new LinkedList(Arrays.asList(JAVA_CHARSETS));
   1221         Collections.sort(l);
   1222         StringBuffer sb = new StringBuffer();
   1223         sb.append(" * <table>\n");
   1224         sb.append(" *     <tr>\n");
   1225         sb.append(" *         <td>Canonical (Java) name</td>\n");
   1226         sb.append(" *         <td>MIME preferred</td>\n");
   1227         sb.append(" *         <td>Aliases</td>\n");
   1228         sb.append(" *     </tr>\n");
   1229 
   1230         for (Iterator it = l.iterator(); it.hasNext();) {
   1231             Charset c = (Charset) it.next();
   1232             sb.append(" *     <tr>\n");
   1233             sb.append(" *         <td>" + c.canonical + "</td>\n");
   1234             sb.append(" *         <td>" + (c.mime == null ? "?" : c.mime)+ "</td>\n");
   1235             sb.append(" *         <td>");
   1236             for (int i = 0; c.aliases != null && i < c.aliases.length; i++) {
   1237                 sb.append(c.aliases[i] + " ");
   1238             }
   1239             sb.append("</td>\n");
   1240             sb.append(" *     </tr>\n");
   1241         }
   1242         sb.append(" * </table>\n");
   1243         return sb.toString();
   1244     }
   1245 
   1246     public static void main(String[] args) {
   1247         System.out.println(dumpHtmlTable());
   1248     }*/
   1249 }
   1250