1 /**************************************************************** 2 * Licensed to the Apache Software Foundation (ASF) under one * 3 * or more contributor license agreements. See the NOTICE file * 4 * distributed with this work for additional information * 5 * regarding copyright ownership. The ASF licenses this file * 6 * to you under the Apache License, Version 2.0 (the * 7 * "License"); you may not use this file except in compliance * 8 * with the License. You may obtain a copy of the License at * 9 * * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, * 13 * software distributed under the License is distributed on an * 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 15 * KIND, either express or implied. See the License for the * 16 * specific language governing permissions and limitations * 17 * under the License. * 18 ****************************************************************/ 19 20 package org.apache.james.mime4j.util; 21 22 import java.io.UnsupportedEncodingException; 23 import java.nio.charset.IllegalCharsetNameException; 24 import java.nio.charset.UnsupportedCharsetException; 25 import java.util.HashMap; 26 import java.util.TreeSet; 27 28 //BEGIN android-changed: Stubbing out logging 29 import org.apache.james.mime4j.Log; 30 import org.apache.james.mime4j.LogFactory; 31 //END android-changed 32 33 /** 34 * Utility class for working with character sets. It is somewhat similar to 35 * the Java 1.4 <code>java.nio.charset.Charset</code> class but knows many 36 * more aliases and is compatible with Java 1.3. It will use a simple detection 37 * mechanism to detect what character sets the current VM supports. This will 38 * be a sub-set of the character sets listed in the 39 * <a href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html"> 40 * Java 1.5 (J2SE5.0) Supported Encodings</a> document. 41 * <p> 42 * The <a href="http://www.iana.org/assignments/character-sets"> 43 * IANA Character Sets</a> document has been used to determine the preferred 44 * MIME character set names and to get a list of known aliases. 45 * <p> 46 * This is a complete list of the character sets known to this class: 47 * <table> 48 * <tr> 49 * <td>Canonical (Java) name</td> 50 * <td>MIME preferred</td> 51 * <td>Aliases</td> 52 * </tr> 53 * <tr> 54 * <td>ASCII</td> 55 * <td>US-ASCII</td> 56 * <td>ANSI_X3.4-1968 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ISO646-US us IBM367 cp367 csASCII ascii7 646 iso_646.irv:1983 </td> 57 * </tr> 58 * <tr> 59 * <td>Big5</td> 60 * <td>Big5</td> 61 * <td>csBig5 CN-Big5 BIG-FIVE BIGFIVE </td> 62 * </tr> 63 * <tr> 64 * <td>Big5_HKSCS</td> 65 * <td>Big5-HKSCS</td> 66 * <td>big5hkscs </td> 67 * </tr> 68 * <tr> 69 * <td>Big5_Solaris</td> 70 * <td>?</td> 71 * <td></td> 72 * </tr> 73 * <tr> 74 * <td>Cp037</td> 75 * <td>IBM037</td> 76 * <td>ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 </td> 77 * </tr> 78 * <tr> 79 * <td>Cp1006</td> 80 * <td>?</td> 81 * <td></td> 82 * </tr> 83 * <tr> 84 * <td>Cp1025</td> 85 * <td>?</td> 86 * <td></td> 87 * </tr> 88 * <tr> 89 * <td>Cp1026</td> 90 * <td>IBM1026</td> 91 * <td>csIBM1026 </td> 92 * </tr> 93 * <tr> 94 * <td>Cp1046</td> 95 * <td>?</td> 96 * <td></td> 97 * </tr> 98 * <tr> 99 * <td>Cp1047</td> 100 * <td>IBM1047</td> 101 * <td>IBM-1047 </td> 102 * </tr> 103 * <tr> 104 * <td>Cp1097</td> 105 * <td>?</td> 106 * <td></td> 107 * </tr> 108 * <tr> 109 * <td>Cp1098</td> 110 * <td>?</td> 111 * <td></td> 112 * </tr> 113 * <tr> 114 * <td>Cp1112</td> 115 * <td>?</td> 116 * <td></td> 117 * </tr> 118 * <tr> 119 * <td>Cp1122</td> 120 * <td>?</td> 121 * <td></td> 122 * </tr> 123 * <tr> 124 * <td>Cp1123</td> 125 * <td>?</td> 126 * <td></td> 127 * </tr> 128 * <tr> 129 * <td>Cp1124</td> 130 * <td>?</td> 131 * <td></td> 132 * </tr> 133 * <tr> 134 * <td>Cp1140</td> 135 * <td>IBM01140</td> 136 * <td>CCSID01140 CP01140 ebcdic-us-37+euro </td> 137 * </tr> 138 * <tr> 139 * <td>Cp1141</td> 140 * <td>IBM01141</td> 141 * <td>CCSID01141 CP01141 ebcdic-de-273+euro </td> 142 * </tr> 143 * <tr> 144 * <td>Cp1142</td> 145 * <td>IBM01142</td> 146 * <td>CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro </td> 147 * </tr> 148 * <tr> 149 * <td>Cp1143</td> 150 * <td>IBM01143</td> 151 * <td>CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro </td> 152 * </tr> 153 * <tr> 154 * <td>Cp1144</td> 155 * <td>IBM01144</td> 156 * <td>CCSID01144 CP01144 ebcdic-it-280+euro </td> 157 * </tr> 158 * <tr> 159 * <td>Cp1145</td> 160 * <td>IBM01145</td> 161 * <td>CCSID01145 CP01145 ebcdic-es-284+euro </td> 162 * </tr> 163 * <tr> 164 * <td>Cp1146</td> 165 * <td>IBM01146</td> 166 * <td>CCSID01146 CP01146 ebcdic-gb-285+euro </td> 167 * </tr> 168 * <tr> 169 * <td>Cp1147</td> 170 * <td>IBM01147</td> 171 * <td>CCSID01147 CP01147 ebcdic-fr-297+euro </td> 172 * </tr> 173 * <tr> 174 * <td>Cp1148</td> 175 * <td>IBM01148</td> 176 * <td>CCSID01148 CP01148 ebcdic-international-500+euro </td> 177 * </tr> 178 * <tr> 179 * <td>Cp1149</td> 180 * <td>IBM01149</td> 181 * <td>CCSID01149 CP01149 ebcdic-is-871+euro </td> 182 * </tr> 183 * <tr> 184 * <td>Cp1250</td> 185 * <td>windows-1250</td> 186 * <td></td> 187 * </tr> 188 * <tr> 189 * <td>Cp1251</td> 190 * <td>windows-1251</td> 191 * <td></td> 192 * </tr> 193 * <tr> 194 * <td>Cp1252</td> 195 * <td>windows-1252</td> 196 * <td></td> 197 * </tr> 198 * <tr> 199 * <td>Cp1253</td> 200 * <td>windows-1253</td> 201 * <td></td> 202 * </tr> 203 * <tr> 204 * <td>Cp1254</td> 205 * <td>windows-1254</td> 206 * <td></td> 207 * </tr> 208 * <tr> 209 * <td>Cp1255</td> 210 * <td>windows-1255</td> 211 * <td></td> 212 * </tr> 213 * <tr> 214 * <td>Cp1256</td> 215 * <td>windows-1256</td> 216 * <td></td> 217 * </tr> 218 * <tr> 219 * <td>Cp1257</td> 220 * <td>windows-1257</td> 221 * <td></td> 222 * </tr> 223 * <tr> 224 * <td>Cp1258</td> 225 * <td>windows-1258</td> 226 * <td></td> 227 * </tr> 228 * <tr> 229 * <td>Cp1381</td> 230 * <td>?</td> 231 * <td></td> 232 * </tr> 233 * <tr> 234 * <td>Cp1383</td> 235 * <td>?</td> 236 * <td></td> 237 * </tr> 238 * <tr> 239 * <td>Cp273</td> 240 * <td>IBM273</td> 241 * <td>csIBM273 </td> 242 * </tr> 243 * <tr> 244 * <td>Cp277</td> 245 * <td>IBM277</td> 246 * <td>EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 </td> 247 * </tr> 248 * <tr> 249 * <td>Cp278</td> 250 * <td>IBM278</td> 251 * <td>CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278 </td> 252 * </tr> 253 * <tr> 254 * <td>Cp280</td> 255 * <td>IBM280</td> 256 * <td>ebcdic-cp-it csIBM280 </td> 257 * </tr> 258 * <tr> 259 * <td>Cp284</td> 260 * <td>IBM284</td> 261 * <td>ebcdic-cp-es csIBM284 </td> 262 * </tr> 263 * <tr> 264 * <td>Cp285</td> 265 * <td>IBM285</td> 266 * <td>ebcdic-cp-gb csIBM285 </td> 267 * </tr> 268 * <tr> 269 * <td>Cp297</td> 270 * <td>IBM297</td> 271 * <td>ebcdic-cp-fr csIBM297 </td> 272 * </tr> 273 * <tr> 274 * <td>Cp33722</td> 275 * <td>?</td> 276 * <td></td> 277 * </tr> 278 * <tr> 279 * <td>Cp420</td> 280 * <td>IBM420</td> 281 * <td>ebcdic-cp-ar1 csIBM420 </td> 282 * </tr> 283 * <tr> 284 * <td>Cp424</td> 285 * <td>IBM424</td> 286 * <td>ebcdic-cp-he csIBM424 </td> 287 * </tr> 288 * <tr> 289 * <td>Cp437</td> 290 * <td>IBM437</td> 291 * <td>437 csPC8CodePage437 </td> 292 * </tr> 293 * <tr> 294 * <td>Cp500</td> 295 * <td>IBM500</td> 296 * <td>ebcdic-cp-be ebcdic-cp-ch csIBM500 </td> 297 * </tr> 298 * <tr> 299 * <td>Cp737</td> 300 * <td>?</td> 301 * <td></td> 302 * </tr> 303 * <tr> 304 * <td>Cp775</td> 305 * <td>IBM775</td> 306 * <td>csPC775Baltic </td> 307 * </tr> 308 * <tr> 309 * <td>Cp838</td> 310 * <td>IBM-Thai</td> 311 * <td></td> 312 * </tr> 313 * <tr> 314 * <td>Cp850</td> 315 * <td>IBM850</td> 316 * <td>850 csPC850Multilingual </td> 317 * </tr> 318 * <tr> 319 * <td>Cp852</td> 320 * <td>IBM852</td> 321 * <td>852 csPCp852 </td> 322 * </tr> 323 * <tr> 324 * <td>Cp855</td> 325 * <td>IBM855</td> 326 * <td>855 csIBM855 </td> 327 * </tr> 328 * <tr> 329 * <td>Cp856</td> 330 * <td>?</td> 331 * <td></td> 332 * </tr> 333 * <tr> 334 * <td>Cp857</td> 335 * <td>IBM857</td> 336 * <td>857 csIBM857 </td> 337 * </tr> 338 * <tr> 339 * <td>Cp858</td> 340 * <td>IBM00858</td> 341 * <td>CCSID00858 CP00858 PC-Multilingual-850+euro </td> 342 * </tr> 343 * <tr> 344 * <td>Cp860</td> 345 * <td>IBM860</td> 346 * <td>860 csIBM860 </td> 347 * </tr> 348 * <tr> 349 * <td>Cp861</td> 350 * <td>IBM861</td> 351 * <td>861 cp-is csIBM861 </td> 352 * </tr> 353 * <tr> 354 * <td>Cp862</td> 355 * <td>IBM862</td> 356 * <td>862 csPC862LatinHebrew </td> 357 * </tr> 358 * <tr> 359 * <td>Cp863</td> 360 * <td>IBM863</td> 361 * <td>863 csIBM863 </td> 362 * </tr> 363 * <tr> 364 * <td>Cp864</td> 365 * <td>IBM864</td> 366 * <td>cp864 csIBM864 </td> 367 * </tr> 368 * <tr> 369 * <td>Cp865</td> 370 * <td>IBM865</td> 371 * <td>865 csIBM865 </td> 372 * </tr> 373 * <tr> 374 * <td>Cp866</td> 375 * <td>IBM866</td> 376 * <td>866 csIBM866 </td> 377 * </tr> 378 * <tr> 379 * <td>Cp868</td> 380 * <td>IBM868</td> 381 * <td>cp-ar csIBM868 </td> 382 * </tr> 383 * <tr> 384 * <td>Cp869</td> 385 * <td>IBM869</td> 386 * <td>cp-gr csIBM869 </td> 387 * </tr> 388 * <tr> 389 * <td>Cp870</td> 390 * <td>IBM870</td> 391 * <td>ebcdic-cp-roece ebcdic-cp-yu csIBM870 </td> 392 * </tr> 393 * <tr> 394 * <td>Cp871</td> 395 * <td>IBM871</td> 396 * <td>ebcdic-cp-is csIBM871 </td> 397 * </tr> 398 * <tr> 399 * <td>Cp875</td> 400 * <td>?</td> 401 * <td></td> 402 * </tr> 403 * <tr> 404 * <td>Cp918</td> 405 * <td>IBM918</td> 406 * <td>ebcdic-cp-ar2 csIBM918 </td> 407 * </tr> 408 * <tr> 409 * <td>Cp921</td> 410 * <td>?</td> 411 * <td></td> 412 * </tr> 413 * <tr> 414 * <td>Cp922</td> 415 * <td>?</td> 416 * <td></td> 417 * </tr> 418 * <tr> 419 * <td>Cp930</td> 420 * <td>?</td> 421 * <td></td> 422 * </tr> 423 * <tr> 424 * <td>Cp933</td> 425 * <td>?</td> 426 * <td></td> 427 * </tr> 428 * <tr> 429 * <td>Cp935</td> 430 * <td>?</td> 431 * <td></td> 432 * </tr> 433 * <tr> 434 * <td>Cp937</td> 435 * <td>?</td> 436 * <td></td> 437 * </tr> 438 * <tr> 439 * <td>Cp939</td> 440 * <td>?</td> 441 * <td></td> 442 * </tr> 443 * <tr> 444 * <td>Cp942</td> 445 * <td>?</td> 446 * <td></td> 447 * </tr> 448 * <tr> 449 * <td>Cp942C</td> 450 * <td>?</td> 451 * <td></td> 452 * </tr> 453 * <tr> 454 * <td>Cp943</td> 455 * <td>?</td> 456 * <td></td> 457 * </tr> 458 * <tr> 459 * <td>Cp943C</td> 460 * <td>?</td> 461 * <td></td> 462 * </tr> 463 * <tr> 464 * <td>Cp948</td> 465 * <td>?</td> 466 * <td></td> 467 * </tr> 468 * <tr> 469 * <td>Cp949</td> 470 * <td>?</td> 471 * <td></td> 472 * </tr> 473 * <tr> 474 * <td>Cp949C</td> 475 * <td>?</td> 476 * <td></td> 477 * </tr> 478 * <tr> 479 * <td>Cp950</td> 480 * <td>?</td> 481 * <td></td> 482 * </tr> 483 * <tr> 484 * <td>Cp964</td> 485 * <td>?</td> 486 * <td></td> 487 * </tr> 488 * <tr> 489 * <td>Cp970</td> 490 * <td>?</td> 491 * <td></td> 492 * </tr> 493 * <tr> 494 * <td>EUC_CN</td> 495 * <td>GB2312</td> 496 * <td>x-EUC-CN csGB2312 euccn euc-cn gb2312-80 gb2312-1980 CN-GB CN-GB-ISOIR165 </td> 497 * </tr> 498 * <tr> 499 * <td>EUC_JP</td> 500 * <td>EUC-JP</td> 501 * <td>csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese eucjis x-eucjp eucjp x-euc-jp </td> 502 * </tr> 503 * <tr> 504 * <td>EUC_JP_LINUX</td> 505 * <td>?</td> 506 * <td></td> 507 * </tr> 508 * <tr> 509 * <td>EUC_JP_Solaris</td> 510 * <td>?</td> 511 * <td></td> 512 * </tr> 513 * <tr> 514 * <td>EUC_KR</td> 515 * <td>EUC-KR</td> 516 * <td>csEUCKR ksc5601 5601 ksc5601_1987 ksc_5601 ksc5601-1987 ks_c_5601-1987 euckr </td> 517 * </tr> 518 * <tr> 519 * <td>EUC_TW</td> 520 * <td>EUC-TW</td> 521 * <td>x-EUC-TW cns11643 euctw </td> 522 * </tr> 523 * <tr> 524 * <td>GB18030</td> 525 * <td>GB18030</td> 526 * <td>gb18030-2000 </td> 527 * </tr> 528 * <tr> 529 * <td>GBK</td> 530 * <td>windows-936</td> 531 * <td>CP936 MS936 ms_936 x-mswin-936 </td> 532 * </tr> 533 * <tr> 534 * <td>ISCII91</td> 535 * <td>?</td> 536 * <td>x-ISCII91 iscii </td> 537 * </tr> 538 * <tr> 539 * <td>ISO2022CN</td> 540 * <td>ISO-2022-CN</td> 541 * <td></td> 542 * </tr> 543 * <tr> 544 * <td>ISO2022JP</td> 545 * <td>ISO-2022-JP</td> 546 * <td>csISO2022JP JIS jis_encoding csjisencoding </td> 547 * </tr> 548 * <tr> 549 * <td>ISO2022KR</td> 550 * <td>ISO-2022-KR</td> 551 * <td>csISO2022KR </td> 552 * </tr> 553 * <tr> 554 * <td>ISO2022_CN_CNS</td> 555 * <td>?</td> 556 * <td></td> 557 * </tr> 558 * <tr> 559 * <td>ISO2022_CN_GB</td> 560 * <td>?</td> 561 * <td></td> 562 * </tr> 563 * <tr> 564 * <td>ISO8859_1</td> 565 * <td>ISO-8859-1</td> 566 * <td>ISO_8859-1:1987 iso-ir-100 ISO_8859-1 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 819 IBM-819 ISO8859-1 ISO_8859_1 </td> 567 * </tr> 568 * <tr> 569 * <td>ISO8859_13</td> 570 * <td>ISO-8859-13</td> 571 * <td></td> 572 * </tr> 573 * <tr> 574 * <td>ISO8859_15</td> 575 * <td>ISO-8859-15</td> 576 * <td>ISO_8859-15 Latin-9 8859_15 csISOlatin9 IBM923 cp923 923 L9 IBM-923 ISO8859-15 LATIN9 LATIN0 csISOlatin0 ISO8859_15_FDIS </td> 577 * </tr> 578 * <tr> 579 * <td>ISO8859_2</td> 580 * <td>ISO-8859-2</td> 581 * <td>ISO_8859-2:1987 iso-ir-101 ISO_8859-2 latin2 l2 csISOLatin2 8859_2 iso8859_2 </td> 582 * </tr> 583 * <tr> 584 * <td>ISO8859_3</td> 585 * <td>ISO-8859-3</td> 586 * <td>ISO_8859-3:1988 iso-ir-109 ISO_8859-3 latin3 l3 csISOLatin3 8859_3 </td> 587 * </tr> 588 * <tr> 589 * <td>ISO8859_4</td> 590 * <td>ISO-8859-4</td> 591 * <td>ISO_8859-4:1988 iso-ir-110 ISO_8859-4 latin4 l4 csISOLatin4 8859_4 </td> 592 * </tr> 593 * <tr> 594 * <td>ISO8859_5</td> 595 * <td>ISO-8859-5</td> 596 * <td>ISO_8859-5:1988 iso-ir-144 ISO_8859-5 cyrillic csISOLatinCyrillic 8859_5 </td> 597 * </tr> 598 * <tr> 599 * <td>ISO8859_6</td> 600 * <td>ISO-8859-6</td> 601 * <td>ISO_8859-6:1987 iso-ir-127 ISO_8859-6 ECMA-114 ASMO-708 arabic csISOLatinArabic 8859_6 </td> 602 * </tr> 603 * <tr> 604 * <td>ISO8859_7</td> 605 * <td>ISO-8859-7</td> 606 * <td>ISO_8859-7:1987 iso-ir-126 ISO_8859-7 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 sun_eu_greek </td> 607 * </tr> 608 * <tr> 609 * <td>ISO8859_8</td> 610 * <td>ISO-8859-8</td> 611 * <td>ISO_8859-8:1988 iso-ir-138 ISO_8859-8 hebrew csISOLatinHebrew 8859_8 </td> 612 * </tr> 613 * <tr> 614 * <td>ISO8859_9</td> 615 * <td>ISO-8859-9</td> 616 * <td>ISO_8859-9:1989 iso-ir-148 ISO_8859-9 latin5 l5 csISOLatin5 8859_9 </td> 617 * </tr> 618 * <tr> 619 * <td>JISAutoDetect</td> 620 * <td>?</td> 621 * <td></td> 622 * </tr> 623 * <tr> 624 * <td>JIS_C6626-1983</td> 625 * <td>JIS_C6626-1983</td> 626 * <td>x-JIS0208 JIS0208 csISO87JISX0208 x0208 JIS_X0208-1983 iso-ir-87 </td> 627 * </tr> 628 * <tr> 629 * <td>JIS_X0201</td> 630 * <td>JIS_X0201</td> 631 * <td>X0201 JIS0201 csHalfWidthKatakana </td> 632 * </tr> 633 * <tr> 634 * <td>JIS_X0212-1990</td> 635 * <td>JIS_X0212-1990</td> 636 * <td>iso-ir-159 x0212 JIS0212 csISO159JISX02121990 </td> 637 * </tr> 638 * <tr> 639 * <td>KOI8_R</td> 640 * <td>KOI8-R</td> 641 * <td>csKOI8R koi8 </td> 642 * </tr> 643 * <tr> 644 * <td>MS874</td> 645 * <td>windows-874</td> 646 * <td>cp874 </td> 647 * </tr> 648 * <tr> 649 * <td>MS932</td> 650 * <td>Windows-31J</td> 651 * <td>windows-932 csWindows31J x-ms-cp932 </td> 652 * </tr> 653 * <tr> 654 * <td>MS949</td> 655 * <td>windows-949</td> 656 * <td>windows949 ms_949 x-windows-949 </td> 657 * </tr> 658 * <tr> 659 * <td>MS950</td> 660 * <td>windows-950</td> 661 * <td>x-windows-950 </td> 662 * </tr> 663 * <tr> 664 * <td>MS950_HKSCS</td> 665 * <td></td> 666 * <td></td> 667 * </tr> 668 * <tr> 669 * <td>MacArabic</td> 670 * <td>?</td> 671 * <td></td> 672 * </tr> 673 * <tr> 674 * <td>MacCentralEurope</td> 675 * <td>?</td> 676 * <td></td> 677 * </tr> 678 * <tr> 679 * <td>MacCroatian</td> 680 * <td>?</td> 681 * <td></td> 682 * </tr> 683 * <tr> 684 * <td>MacCyrillic</td> 685 * <td>?</td> 686 * <td></td> 687 * </tr> 688 * <tr> 689 * <td>MacDingbat</td> 690 * <td>?</td> 691 * <td></td> 692 * </tr> 693 * <tr> 694 * <td>MacGreek</td> 695 * <td>MacGreek</td> 696 * <td></td> 697 * </tr> 698 * <tr> 699 * <td>MacHebrew</td> 700 * <td>?</td> 701 * <td></td> 702 * </tr> 703 * <tr> 704 * <td>MacIceland</td> 705 * <td>?</td> 706 * <td></td> 707 * </tr> 708 * <tr> 709 * <td>MacRoman</td> 710 * <td>MacRoman</td> 711 * <td>Macintosh MAC csMacintosh </td> 712 * </tr> 713 * <tr> 714 * <td>MacRomania</td> 715 * <td>?</td> 716 * <td></td> 717 * </tr> 718 * <tr> 719 * <td>MacSymbol</td> 720 * <td>?</td> 721 * <td></td> 722 * </tr> 723 * <tr> 724 * <td>MacThai</td> 725 * <td>?</td> 726 * <td></td> 727 * </tr> 728 * <tr> 729 * <td>MacTurkish</td> 730 * <td>?</td> 731 * <td></td> 732 * </tr> 733 * <tr> 734 * <td>MacUkraine</td> 735 * <td>?</td> 736 * <td></td> 737 * </tr> 738 * <tr> 739 * <td>SJIS</td> 740 * <td>Shift_JIS</td> 741 * <td>MS_Kanji csShiftJIS shift-jis x-sjis pck </td> 742 * </tr> 743 * <tr> 744 * <td>TIS620</td> 745 * <td>TIS-620</td> 746 * <td></td> 747 * </tr> 748 * <tr> 749 * <td>UTF-16</td> 750 * <td>UTF-16</td> 751 * <td>UTF_16 </td> 752 * </tr> 753 * <tr> 754 * <td>UTF8</td> 755 * <td>UTF-8</td> 756 * <td></td> 757 * </tr> 758 * <tr> 759 * <td>UnicodeBig</td> 760 * <td>?</td> 761 * <td></td> 762 * </tr> 763 * <tr> 764 * <td>UnicodeBigUnmarked</td> 765 * <td>UTF-16BE</td> 766 * <td>X-UTF-16BE UTF_16BE ISO-10646-UCS-2 </td> 767 * </tr> 768 * <tr> 769 * <td>UnicodeLittle</td> 770 * <td>?</td> 771 * <td></td> 772 * </tr> 773 * <tr> 774 * <td>UnicodeLittleUnmarked</td> 775 * <td>UTF-16LE</td> 776 * <td>UTF_16LE X-UTF-16LE </td> 777 * </tr> 778 * <tr> 779 * <td>x-Johab</td> 780 * <td>johab</td> 781 * <td>johab cp1361 ms1361 ksc5601-1992 ksc5601_1992 </td> 782 * </tr> 783 * <tr> 784 * <td>x-iso-8859-11</td> 785 * <td>?</td> 786 * <td></td> 787 * </tr> 788 * </table> 789 * 790 * 791 * @version $Id: CharsetUtil.java,v 1.1 2004/10/25 07:26:46 ntherning Exp $ 792 */ 793 public class CharsetUtil { 794 private static Log log = LogFactory.getLog(CharsetUtil.class); 795 796 private static class Charset implements Comparable<Charset> { 797 private String canonical = null; 798 private String mime = null; 799 private String[] aliases = null; 800 801 private Charset(String canonical, String mime, String[] aliases) { 802 this.canonical = canonical; 803 this.mime = mime; 804 this.aliases = aliases; 805 } 806 807 public int compareTo(Charset c) { 808 return this.canonical.compareTo(c.canonical); 809 } 810 } 811 812 private static Charset[] JAVA_CHARSETS = { 813 new Charset("ISO8859_1", "ISO-8859-1", 814 new String[] {"ISO_8859-1:1987", "iso-ir-100", "ISO_8859-1", 815 "latin1", "l1", "IBM819", "CP819", 816 "csISOLatin1", "8859_1", "819", "IBM-819", 817 "ISO8859-1", "ISO_8859_1"}), 818 new Charset("ISO8859_2", "ISO-8859-2", 819 new String[] {"ISO_8859-2:1987", "iso-ir-101", "ISO_8859-2", 820 "latin2", "l2", "csISOLatin2", "8859_2", 821 "iso8859_2"}), 822 new Charset("ISO8859_3", "ISO-8859-3", new String[] {"ISO_8859-3:1988", "iso-ir-109", "ISO_8859-3", "latin3", "l3", "csISOLatin3", "8859_3"}), 823 new Charset("ISO8859_4", "ISO-8859-4", 824 new String[] {"ISO_8859-4:1988", "iso-ir-110", "ISO_8859-4", 825 "latin4", "l4", "csISOLatin4", "8859_4"}), 826 new Charset("ISO8859_5", "ISO-8859-5", 827 new String[] {"ISO_8859-5:1988", "iso-ir-144", "ISO_8859-5", 828 "cyrillic", "csISOLatinCyrillic", "8859_5"}), 829 new Charset("ISO8859_6", "ISO-8859-6", new String[] {"ISO_8859-6:1987", "iso-ir-127", "ISO_8859-6", "ECMA-114", "ASMO-708", "arabic", "csISOLatinArabic", "8859_6"}), 830 new Charset("ISO8859_7", "ISO-8859-7", 831 new String[] {"ISO_8859-7:1987", "iso-ir-126", "ISO_8859-7", 832 "ELOT_928", "ECMA-118", "greek", "greek8", 833 "csISOLatinGreek", "8859_7", "sun_eu_greek"}), 834 new Charset("ISO8859_8", "ISO-8859-8", new String[] {"ISO_8859-8:1988", "iso-ir-138", "ISO_8859-8", "hebrew", "csISOLatinHebrew", "8859_8"}), 835 new Charset("ISO8859_9", "ISO-8859-9", 836 new String[] {"ISO_8859-9:1989", "iso-ir-148", "ISO_8859-9", 837 "latin5", "l5", "csISOLatin5", "8859_9"}), 838 839 new Charset("ISO8859_13", "ISO-8859-13", new String[] {}), 840 new Charset("ISO8859_15", "ISO-8859-15", 841 new String[] {"ISO_8859-15", "Latin-9", "8859_15", 842 "csISOlatin9", "IBM923", "cp923", "923", "L9", 843 "IBM-923", "ISO8859-15", "LATIN9", "LATIN0", 844 "csISOlatin0", "ISO8859_15_FDIS"}), 845 new Charset("KOI8_R", "KOI8-R", new String[] {"csKOI8R", "koi8"}), 846 new Charset("ASCII", "US-ASCII", 847 new String[] {"ANSI_X3.4-1968", "iso-ir-6", 848 "ANSI_X3.4-1986", "ISO_646.irv:1991", 849 "ISO646-US", "us", "IBM367", "cp367", 850 "csASCII", "ascii7", "646", "iso_646.irv:1983"}), 851 new Charset("UTF8", "UTF-8", new String[] {}), 852 new Charset("UTF-16", "UTF-16", new String[] {"UTF_16"}), 853 new Charset("UnicodeBigUnmarked", "UTF-16BE", new String[] {"X-UTF-16BE", "UTF_16BE", "ISO-10646-UCS-2"}), 854 new Charset("UnicodeLittleUnmarked", "UTF-16LE", new String[] {"UTF_16LE", "X-UTF-16LE"}), 855 new Charset("Big5", "Big5", new String[] {"csBig5", "CN-Big5", "BIG-FIVE", "BIGFIVE"}), 856 new Charset("Big5_HKSCS", "Big5-HKSCS", new String[] {"big5hkscs"}), 857 new Charset("EUC_JP", "EUC-JP", 858 new String[] {"csEUCPkdFmtJapanese", 859 "Extended_UNIX_Code_Packed_Format_for_Japanese", 860 "eucjis", "x-eucjp", "eucjp", "x-euc-jp"}), 861 new Charset("EUC_KR", "EUC-KR", 862 new String[] {"csEUCKR", "ksc5601", "5601", "ksc5601_1987", 863 "ksc_5601", "ksc5601-1987", "ks_c_5601-1987", 864 "euckr"}), 865 new Charset("GB18030", "GB18030", new String[] {"gb18030-2000"}), 866 new Charset("EUC_CN", "GB2312", new String[] {"x-EUC-CN", "csGB2312", "euccn", "euc-cn", "gb2312-80", "gb2312-1980", "CN-GB", "CN-GB-ISOIR165"}), 867 new Charset("GBK", "windows-936", new String[] {"CP936", "MS936", "ms_936", "x-mswin-936"}), 868 869 new Charset("Cp037", "IBM037", new String[] {"ebcdic-cp-us", "ebcdic-cp-ca", "ebcdic-cp-wt", "ebcdic-cp-nl", "csIBM037"}), 870 new Charset("Cp273", "IBM273", new String[] {"csIBM273"}), 871 new Charset("Cp277", "IBM277", new String[] {"EBCDIC-CP-DK", "EBCDIC-CP-NO", "csIBM277"}), 872 new Charset("Cp278", "IBM278", new String[] {"CP278", "ebcdic-cp-fi", "ebcdic-cp-se", "csIBM278"}), 873 new Charset("Cp280", "IBM280", new String[] {"ebcdic-cp-it", "csIBM280"}), 874 new Charset("Cp284", "IBM284", new String[] {"ebcdic-cp-es", "csIBM284"}), 875 new Charset("Cp285", "IBM285", new String[] {"ebcdic-cp-gb", "csIBM285"}), 876 new Charset("Cp297", "IBM297", new String[] {"ebcdic-cp-fr", "csIBM297"}), 877 new Charset("Cp420", "IBM420", new String[] {"ebcdic-cp-ar1", "csIBM420"}), 878 new Charset("Cp424", "IBM424", new String[] {"ebcdic-cp-he", "csIBM424"}), 879 new Charset("Cp437", "IBM437", new String[] {"437", "csPC8CodePage437"}), 880 new Charset("Cp500", "IBM500", new String[] {"ebcdic-cp-be", "ebcdic-cp-ch", "csIBM500"}), 881 new Charset("Cp775", "IBM775", new String[] {"csPC775Baltic"}), 882 new Charset("Cp838", "IBM-Thai", new String[] {}), 883 new Charset("Cp850", "IBM850", new String[] {"850", "csPC850Multilingual"}), 884 new Charset("Cp852", "IBM852", new String[] {"852", "csPCp852"}), 885 new Charset("Cp855", "IBM855", new String[] {"855", "csIBM855"}), 886 new Charset("Cp857", "IBM857", new String[] {"857", "csIBM857"}), 887 new Charset("Cp858", "IBM00858", 888 new String[] {"CCSID00858", "CP00858", 889 "PC-Multilingual-850+euro"}), 890 new Charset("Cp860", "IBM860", new String[] {"860", "csIBM860"}), 891 new Charset("Cp861", "IBM861", new String[] {"861", "cp-is", "csIBM861"}), 892 new Charset("Cp862", "IBM862", new String[] {"862", "csPC862LatinHebrew"}), 893 new Charset("Cp863", "IBM863", new String[] {"863", "csIBM863"}), 894 new Charset("Cp864", "IBM864", new String[] {"cp864", "csIBM864"}), 895 new Charset("Cp865", "IBM865", new String[] {"865", "csIBM865"}), 896 new Charset("Cp866", "IBM866", new String[] {"866", "csIBM866"}), 897 new Charset("Cp868", "IBM868", new String[] {"cp-ar", "csIBM868"}), 898 new Charset("Cp869", "IBM869", new String[] {"cp-gr", "csIBM869"}), 899 new Charset("Cp870", "IBM870", new String[] {"ebcdic-cp-roece", "ebcdic-cp-yu", "csIBM870"}), 900 new Charset("Cp871", "IBM871", new String[] {"ebcdic-cp-is", "csIBM871"}), 901 new Charset("Cp918", "IBM918", new String[] {"ebcdic-cp-ar2", "csIBM918"}), 902 new Charset("Cp1026", "IBM1026", new String[] {"csIBM1026"}), 903 new Charset("Cp1047", "IBM1047", new String[] {"IBM-1047"}), 904 new Charset("Cp1140", "IBM01140", 905 new String[] {"CCSID01140", "CP01140", 906 "ebcdic-us-37+euro"}), 907 new Charset("Cp1141", "IBM01141", 908 new String[] {"CCSID01141", "CP01141", 909 "ebcdic-de-273+euro"}), 910 new Charset("Cp1142", "IBM01142", new String[] {"CCSID01142", "CP01142", "ebcdic-dk-277+euro", "ebcdic-no-277+euro"}), 911 new Charset("Cp1143", "IBM01143", new String[] {"CCSID01143", "CP01143", "ebcdic-fi-278+euro", "ebcdic-se-278+euro"}), 912 new Charset("Cp1144", "IBM01144", new String[] {"CCSID01144", "CP01144", "ebcdic-it-280+euro"}), 913 new Charset("Cp1145", "IBM01145", new String[] {"CCSID01145", "CP01145", "ebcdic-es-284+euro"}), 914 new Charset("Cp1146", "IBM01146", new String[] {"CCSID01146", "CP01146", "ebcdic-gb-285+euro"}), 915 new Charset("Cp1147", "IBM01147", new String[] {"CCSID01147", "CP01147", "ebcdic-fr-297+euro"}), 916 new Charset("Cp1148", "IBM01148", new String[] {"CCSID01148", "CP01148", "ebcdic-international-500+euro"}), 917 new Charset("Cp1149", "IBM01149", new String[] {"CCSID01149", "CP01149", "ebcdic-is-871+euro"}), 918 new Charset("Cp1250", "windows-1250", new String[] {}), 919 new Charset("Cp1251", "windows-1251", new String[] {}), 920 new Charset("Cp1252", "windows-1252", new String[] {}), 921 new Charset("Cp1253", "windows-1253", new String[] {}), 922 new Charset("Cp1254", "windows-1254", new String[] {}), 923 new Charset("Cp1255", "windows-1255", new String[] {}), 924 new Charset("Cp1256", "windows-1256", new String[] {}), 925 new Charset("Cp1257", "windows-1257", new String[] {}), 926 new Charset("Cp1258", "windows-1258", new String[] {}), 927 new Charset("ISO2022CN", "ISO-2022-CN", new String[] {}), 928 new Charset("ISO2022JP", "ISO-2022-JP", new String[] {"csISO2022JP", "JIS", "jis_encoding", "csjisencoding"}), 929 new Charset("ISO2022KR", "ISO-2022-KR", new String[] {"csISO2022KR"}), 930 new Charset("JIS_X0201", "JIS_X0201", new String[] {"X0201", "JIS0201", "csHalfWidthKatakana"}), 931 new Charset("JIS_X0212-1990", "JIS_X0212-1990", new String[] {"iso-ir-159", "x0212", "JIS0212", "csISO159JISX02121990"}), 932 new Charset("JIS_C6626-1983", "JIS_C6626-1983", new String[] {"x-JIS0208", "JIS0208", "csISO87JISX0208", "x0208", "JIS_X0208-1983", "iso-ir-87"}), 933 new Charset("SJIS", "Shift_JIS", new String[] {"MS_Kanji", "csShiftJIS", "shift-jis", "x-sjis", "pck"}), 934 new Charset("TIS620", "TIS-620", new String[] {}), 935 new Charset("MS932", "Windows-31J", new String[] {"windows-932", "csWindows31J", "x-ms-cp932"}), 936 new Charset("EUC_TW", "EUC-TW", new String[] {"x-EUC-TW", "cns11643", "euctw"}), 937 new Charset("x-Johab", "johab", new String[] {"johab", "cp1361", "ms1361", "ksc5601-1992", "ksc5601_1992"}), 938 new Charset("MS950_HKSCS", "", new String[] {}), 939 new Charset("MS874", "windows-874", new String[] {"cp874"}), 940 new Charset("MS949", "windows-949", new String[] {"windows949", "ms_949", "x-windows-949"}), 941 new Charset("MS950", "windows-950", new String[] {"x-windows-950"}), 942 943 new Charset("Cp737", null, new String[] {}), 944 new Charset("Cp856", null, new String[] {}), 945 new Charset("Cp875", null, new String[] {}), 946 new Charset("Cp921", null, new String[] {}), 947 new Charset("Cp922", null, new String[] {}), 948 new Charset("Cp930", null, new String[] {}), 949 new Charset("Cp933", null, new String[] {}), 950 new Charset("Cp935", null, new String[] {}), 951 new Charset("Cp937", null, new String[] {}), 952 new Charset("Cp939", null, new String[] {}), 953 new Charset("Cp942", null, new String[] {}), 954 new Charset("Cp942C", null, new String[] {}), 955 new Charset("Cp943", null, new String[] {}), 956 new Charset("Cp943C", null, new String[] {}), 957 new Charset("Cp948", null, new String[] {}), 958 new Charset("Cp949", null, new String[] {}), 959 new Charset("Cp949C", null, new String[] {}), 960 new Charset("Cp950", null, new String[] {}), 961 new Charset("Cp964", null, new String[] {}), 962 new Charset("Cp970", null, new String[] {}), 963 new Charset("Cp1006", null, new String[] {}), 964 new Charset("Cp1025", null, new String[] {}), 965 new Charset("Cp1046", null, new String[] {}), 966 new Charset("Cp1097", null, new String[] {}), 967 new Charset("Cp1098", null, new String[] {}), 968 new Charset("Cp1112", null, new String[] {}), 969 new Charset("Cp1122", null, new String[] {}), 970 new Charset("Cp1123", null, new String[] {}), 971 new Charset("Cp1124", null, new String[] {}), 972 new Charset("Cp1381", null, new String[] {}), 973 new Charset("Cp1383", null, new String[] {}), 974 new Charset("Cp33722", null, new String[] {}), 975 new Charset("Big5_Solaris", null, new String[] {}), 976 new Charset("EUC_JP_LINUX", null, new String[] {}), 977 new Charset("EUC_JP_Solaris", null, new String[] {}), 978 new Charset("ISCII91", null, new String[] {"x-ISCII91", "iscii"}), 979 new Charset("ISO2022_CN_CNS", null, new String[] {}), 980 new Charset("ISO2022_CN_GB", null, new String[] {}), 981 new Charset("x-iso-8859-11", null, new String[] {}), 982 new Charset("JISAutoDetect", null, new String[] {}), 983 new Charset("MacArabic", null, new String[] {}), 984 new Charset("MacCentralEurope", null, new String[] {}), 985 new Charset("MacCroatian", null, new String[] {}), 986 new Charset("MacCyrillic", null, new String[] {}), 987 new Charset("MacDingbat", null, new String[] {}), 988 new Charset("MacGreek", "MacGreek", new String[] {}), 989 new Charset("MacHebrew", null, new String[] {}), 990 new Charset("MacIceland", null, new String[] {}), 991 new Charset("MacRoman", "MacRoman", new String[] {"Macintosh", "MAC", "csMacintosh"}), 992 new Charset("MacRomania", null, new String[] {}), 993 new Charset("MacSymbol", null, new String[] {}), 994 new Charset("MacThai", null, new String[] {}), 995 new Charset("MacTurkish", null, new String[] {}), 996 new Charset("MacUkraine", null, new String[] {}), 997 new Charset("UnicodeBig", null, new String[] {}), 998 new Charset("UnicodeLittle", null, new String[] {}) 999 }; 1000 1001 /** 1002 * Contains the canonical names of character sets which can be used to 1003 * decode bytes into Java chars. 1004 */ 1005 private static TreeSet<String> decodingSupported = null; 1006 1007 /** 1008 * Contains the canonical names of character sets which can be used to 1009 * encode Java chars into bytes. 1010 */ 1011 private static TreeSet<String> encodingSupported = null; 1012 1013 /** 1014 * Maps character set names to Charset objects. All possible names of 1015 * a charset will be mapped to the Charset. 1016 */ 1017 private static HashMap<String, Charset> charsetMap = null; 1018 1019 static { 1020 decodingSupported = new TreeSet<String>(); 1021 encodingSupported = new TreeSet<String>(); 1022 byte[] dummy = new byte[] {'d', 'u', 'm', 'm', 'y'}; 1023 for (int i = 0; i < JAVA_CHARSETS.length; i++) { 1024 try { 1025 String s = new String(dummy, JAVA_CHARSETS[i].canonical); 1026 decodingSupported.add(JAVA_CHARSETS[i].canonical.toLowerCase()); 1027 } catch (UnsupportedOperationException e) { 1028 } catch (UnsupportedEncodingException e) { 1029 } 1030 try { 1031 "dummy".getBytes(JAVA_CHARSETS[i].canonical); 1032 encodingSupported.add(JAVA_CHARSETS[i].canonical.toLowerCase()); 1033 } catch (UnsupportedOperationException e) { 1034 } catch (UnsupportedEncodingException e) { 1035 } 1036 } 1037 1038 charsetMap = new HashMap<String, Charset>(); 1039 for (int i = 0; i < JAVA_CHARSETS.length; i++) { 1040 Charset c = JAVA_CHARSETS[i]; 1041 charsetMap.put(c.canonical.toLowerCase(), c); 1042 if (c.mime != null) { 1043 charsetMap.put(c.mime.toLowerCase(), c); 1044 } 1045 if (c.aliases != null) { 1046 for (int j = 0; j < c.aliases.length; j++) { 1047 charsetMap.put(c.aliases[j].toLowerCase(), c); 1048 } 1049 } 1050 } 1051 1052 if (log.isDebugEnabled()) { 1053 log.debug("Character sets which support decoding: " 1054 + decodingSupported); 1055 log.debug("Character sets which support encoding: " 1056 + encodingSupported); 1057 } 1058 } 1059 1060 /** 1061 * ANDROID: THE FOLLOWING SET OF STATIC STRINGS ARE COPIED FROM A NEWER VERSION OF MIME4J 1062 */ 1063 1064 /** carriage return - line feed sequence */ 1065 public static final String CRLF = "\r\n"; 1066 1067 /** US-ASCII CR, carriage return (13) */ 1068 public static final int CR = '\r'; 1069 1070 /** US-ASCII LF, line feed (10) */ 1071 public static final int LF = '\n'; 1072 1073 /** US-ASCII SP, space (32) */ 1074 public static final int SP = ' '; 1075 1076 /** US-ASCII HT, horizontal-tab (9)*/ 1077 public static final int HT = '\t'; 1078 1079 public static final java.nio.charset.Charset US_ASCII = java.nio.charset.Charset 1080 .forName("US-ASCII"); 1081 1082 public static final java.nio.charset.Charset ISO_8859_1 = java.nio.charset.Charset 1083 .forName("ISO-8859-1"); 1084 1085 public static final java.nio.charset.Charset UTF_8 = java.nio.charset.Charset 1086 .forName("UTF-8"); 1087 1088 /** 1089 * Returns <code>true</code> if the specified character is a whitespace 1090 * character (CR, LF, SP or HT). 1091 * 1092 * ANDROID: COPIED FROM A NEWER VERSION OF MIME4J 1093 * 1094 * @param ch 1095 * character to test. 1096 * @return <code>true</code> if the specified character is a whitespace 1097 * character, <code>false</code> otherwise. 1098 */ 1099 public static boolean isWhitespace(char ch) { 1100 return ch == SP || ch == HT || ch == CR || ch == LF; 1101 } 1102 1103 /** 1104 * Returns <code>true</code> if the specified string consists entirely of 1105 * whitespace characters. 1106 * 1107 * ANDROID: COPIED FROM A NEWER VERSION OF MIME4J 1108 * 1109 * @param s 1110 * string to test. 1111 * @return <code>true</code> if the specified string consists entirely of 1112 * whitespace characters, <code>false</code> otherwise. 1113 */ 1114 public static boolean isWhitespace(final String s) { 1115 if (s == null) { 1116 throw new IllegalArgumentException("String may not be null"); 1117 } 1118 final int len = s.length(); 1119 for (int i = 0; i < len; i++) { 1120 if (!isWhitespace(s.charAt(i))) { 1121 return false; 1122 } 1123 } 1124 return true; 1125 } 1126 1127 /** 1128 * Determines if the VM supports encoding (chars to bytes) the 1129 * specified character set. NOTE: the given character set name may 1130 * not be known to the VM even if this method returns <code>true</code>. 1131 * Use {@link #toJavaCharset(String)} to get the canonical Java character 1132 * set name. 1133 * 1134 * @param charsetName the characters set name. 1135 * @return <code>true</code> if encoding is supported, <code>false</code> 1136 * otherwise. 1137 */ 1138 public static boolean isEncodingSupported(String charsetName) { 1139 return encodingSupported.contains(charsetName.toLowerCase()); 1140 } 1141 1142 /** 1143 * Determines if the VM supports decoding (bytes to chars) the 1144 * specified character set. NOTE: the given character set name may 1145 * not be known to the VM even if this method returns <code>true</code>. 1146 * Use {@link #toJavaCharset(String)} to get the canonical Java character 1147 * set name. 1148 * 1149 * @param charsetName the characters set name. 1150 * @return <code>true</code> if decoding is supported, <code>false</code> 1151 * otherwise. 1152 */ 1153 public static boolean isDecodingSupported(String charsetName) { 1154 return decodingSupported.contains(charsetName.toLowerCase()); 1155 } 1156 1157 /** 1158 * Gets the preferred MIME character set name for the specified 1159 * character set or <code>null</code> if not known. 1160 * 1161 * @param charsetName the character set name to look for. 1162 * @return the MIME preferred name or <code>null</code> if not known. 1163 */ 1164 public static String toMimeCharset(String charsetName) { 1165 Charset c = charsetMap.get(charsetName.toLowerCase()); 1166 if (c != null) { 1167 return c.mime; 1168 } 1169 return null; 1170 } 1171 1172 /** 1173 * Gets the canonical Java character set name for the specified 1174 * character set or <code>null</code> if not known. This should be 1175 * called before doing any conversions using the Java API. NOTE: 1176 * you must use {@link #isEncodingSupported(String)} or 1177 * {@link #isDecodingSupported(String)} to make sure the returned 1178 * Java character set is supported by the current VM. 1179 * 1180 * @param charsetName the character set name to look for. 1181 * @return the canonical Java name or <code>null</code> if not known. 1182 */ 1183 public static String toJavaCharset(String charsetName) { 1184 Charset c = charsetMap.get(charsetName.toLowerCase()); 1185 if (c != null) { 1186 return c.canonical; 1187 } 1188 return null; 1189 } 1190 1191 public static java.nio.charset.Charset getCharset(String charsetName) { 1192 String defaultCharset = "ISO-8859-1"; 1193 1194 // Use the default chareset if given charset is null 1195 if(charsetName == null) charsetName = defaultCharset; 1196 1197 try { 1198 return java.nio.charset.Charset.forName(charsetName); 1199 } catch (IllegalCharsetNameException e) { 1200 log.info("Illegal charset " + charsetName + ", fallback to " + defaultCharset + ": " + e); 1201 // Use default charset on exception 1202 return java.nio.charset.Charset.forName(defaultCharset); 1203 } catch (UnsupportedCharsetException ex) { 1204 log.info("Unsupported charset " + charsetName + ", fallback to " + defaultCharset + ": " + ex); 1205 // Use default charset on exception 1206 return java.nio.charset.Charset.forName(defaultCharset); 1207 } 1208 1209 } 1210 /* 1211 * Uncomment the code below and run the main method to regenerate the 1212 * Javadoc table above when the known charsets change. 1213 */ 1214 1215 /* 1216 private static String dumpHtmlTable() { 1217 LinkedList l = new LinkedList(Arrays.asList(JAVA_CHARSETS)); 1218 Collections.sort(l); 1219 StringBuffer sb = new StringBuffer(); 1220 sb.append(" * <table>\n"); 1221 sb.append(" * <tr>\n"); 1222 sb.append(" * <td>Canonical (Java) name</td>\n"); 1223 sb.append(" * <td>MIME preferred</td>\n"); 1224 sb.append(" * <td>Aliases</td>\n"); 1225 sb.append(" * </tr>\n"); 1226 1227 for (Iterator it = l.iterator(); it.hasNext();) { 1228 Charset c = (Charset) it.next(); 1229 sb.append(" * <tr>\n"); 1230 sb.append(" * <td>" + c.canonical + "</td>\n"); 1231 sb.append(" * <td>" + (c.mime == null ? "?" : c.mime)+ "</td>\n"); 1232 sb.append(" * <td>"); 1233 for (int i = 0; c.aliases != null && i < c.aliases.length; i++) { 1234 sb.append(c.aliases[i] + " "); 1235 } 1236 sb.append("</td>\n"); 1237 sb.append(" * </tr>\n"); 1238 } 1239 sb.append(" * </table>\n"); 1240 return sb.toString(); 1241 } 1242 1243 public static void main(String[] args) { 1244 System.out.println(dumpHtmlTable()); 1245 }*/ 1246 } 1247