Home | History | Annotate | Download | only in lib
      1 #! /bin/sh
      2 # Output a system dependent table of character encoding aliases.
      3 #
      4 #   Copyright (C) 2000-2004, 2006-2009 Free Software Foundation, Inc.
      5 #
      6 #   This program is free software; you can redistribute it and/or modify
      7 #   it under the terms of the GNU General Public License as published by
      8 #   the Free Software Foundation; either version 3, or (at your option)
      9 #   any later version.
     10 #
     11 #   This program is distributed in the hope that it will be useful,
     12 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 #   GNU General Public License for more details.
     15 #
     16 #   You should have received a copy of the GNU General Public License along
     17 #   with this program; if not, write to the Free Software Foundation,
     18 #   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
     19 #
     20 # The table consists of lines of the form
     21 #    ALIAS  CANONICAL
     22 #
     23 # ALIAS is the (system dependent) result of "nl_langinfo (CODESET)".
     24 # ALIAS is compared in a case sensitive way.
     25 #
     26 # CANONICAL is the GNU canonical name for this character encoding.
     27 # It must be an encoding supported by libiconv. Support by GNU libc is
     28 # also desirable. CANONICAL is case insensitive. Usually an upper case
     29 # MIME charset name is preferred.
     30 # The current list of GNU canonical charset names is as follows.
     31 #
     32 #       name              MIME?             used by which systems
     33 #   ASCII, ANSI_X3.4-1968       glibc solaris freebsd netbsd darwin
     34 #   ISO-8859-1              Y   glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin
     35 #   ISO-8859-2              Y   glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin
     36 #   ISO-8859-3              Y   glibc solaris
     37 #   ISO-8859-4              Y   osf solaris freebsd netbsd openbsd darwin
     38 #   ISO-8859-5              Y   glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin
     39 #   ISO-8859-6              Y   glibc aix hpux solaris
     40 #   ISO-8859-7              Y   glibc aix hpux irix osf solaris netbsd openbsd darwin
     41 #   ISO-8859-8              Y   glibc aix hpux osf solaris
     42 #   ISO-8859-9              Y   glibc aix hpux irix osf solaris darwin
     43 #   ISO-8859-13                 glibc netbsd openbsd darwin
     44 #   ISO-8859-14                 glibc
     45 #   ISO-8859-15                 glibc aix osf solaris freebsd netbsd openbsd darwin
     46 #   KOI8-R                  Y   glibc solaris freebsd netbsd openbsd darwin
     47 #   KOI8-U                  Y   glibc freebsd netbsd openbsd darwin
     48 #   KOI8-T                      glibc
     49 #   CP437                       dos
     50 #   CP775                       dos
     51 #   CP850                       aix osf dos
     52 #   CP852                       dos
     53 #   CP855                       dos
     54 #   CP856                       aix
     55 #   CP857                       dos
     56 #   CP861                       dos
     57 #   CP862                       dos
     58 #   CP864                       dos
     59 #   CP865                       dos
     60 #   CP866                       freebsd netbsd openbsd darwin dos
     61 #   CP869                       dos
     62 #   CP874                       woe32 dos
     63 #   CP922                       aix
     64 #   CP932                       aix woe32 dos
     65 #   CP943                       aix
     66 #   CP949                       osf darwin woe32 dos
     67 #   CP950                       woe32 dos
     68 #   CP1046                      aix
     69 #   CP1124                      aix
     70 #   CP1125                      dos
     71 #   CP1129                      aix
     72 #   CP1131                      darwin
     73 #   CP1250                      woe32
     74 #   CP1251                      glibc solaris netbsd openbsd darwin woe32
     75 #   CP1252                      aix woe32
     76 #   CP1253                      woe32
     77 #   CP1254                      woe32
     78 #   CP1255                      glibc woe32
     79 #   CP1256                      woe32
     80 #   CP1257                      woe32
     81 #   GB2312                  Y   glibc aix hpux irix solaris freebsd netbsd darwin
     82 #   EUC-JP                  Y   glibc aix hpux irix osf solaris freebsd netbsd darwin
     83 #   EUC-KR                  Y   glibc aix hpux irix osf solaris freebsd netbsd darwin
     84 #   EUC-TW                      glibc aix hpux irix osf solaris netbsd
     85 #   BIG5                    Y   glibc aix hpux osf solaris freebsd netbsd darwin
     86 #   BIG5-HKSCS                  glibc solaris darwin
     87 #   GBK                         glibc aix osf solaris darwin woe32 dos
     88 #   GB18030                     glibc solaris netbsd darwin
     89 #   SHIFT_JIS               Y   hpux osf solaris freebsd netbsd darwin
     90 #   JOHAB                       glibc solaris woe32
     91 #   TIS-620                     glibc aix hpux osf solaris
     92 #   VISCII                  Y   glibc
     93 #   TCVN5712-1                  glibc
     94 #   ARMSCII-8                   glibc darwin
     95 #   GEORGIAN-PS                 glibc
     96 #   PT154                       glibc
     97 #   HP-ROMAN8                   hpux
     98 #   HP-ARABIC8                  hpux
     99 #   HP-GREEK8                   hpux
    100 #   HP-HEBREW8                  hpux
    101 #   HP-TURKISH8                 hpux
    102 #   HP-KANA8                    hpux
    103 #   DEC-KANJI                   osf
    104 #   DEC-HANYU                   osf
    105 #   UTF-8                   Y   glibc aix hpux osf solaris netbsd darwin
    106 #
    107 # Note: Names which are not marked as being a MIME name should not be used in
    108 # Internet protocols for information interchange (mail, news, etc.).
    109 #
    110 # Note: ASCII and ANSI_X3.4-1968 are synonymous canonical names. Applications
    111 # must understand both names and treat them as equivalent.
    112 #
    113 # The first argument passed to this file is the canonical host specification,
    114 #    CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
    115 # or
    116 #    CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
    117 
    118 host="$1"
    119 os=`echo "$host" | sed -e 's/^[^-]*-[^-]*-\(.*\)$/\1/'`
    120 echo "# This file contains a table of character encoding aliases,"
    121 echo "# suitable for operating system '${os}'."
    122 echo "# It was automatically generated from config.charset."
    123 # List of references, updated during installation:
    124 echo "# Packages using this file: "
    125 case "$os" in
    126     linux-gnulibc1*)
    127 	# Linux libc5 doesn't have nl_langinfo(CODESET); therefore
    128 	# localcharset.c falls back to using the full locale name
    129 	# from the environment variables.
    130 	echo "C ASCII"
    131 	echo "POSIX ASCII"
    132 	for l in af af_ZA ca ca_ES da da_DK de de_AT de_BE de_CH de_DE de_LU \
    133 	         en en_AU en_BW en_CA en_DK en_GB en_IE en_NZ en_US en_ZA \
    134 	         en_ZW es es_AR es_BO es_CL es_CO es_DO es_EC es_ES es_GT \
    135 	         es_HN es_MX es_PA es_PE es_PY es_SV es_US es_UY es_VE et \
    136 	         et_EE eu eu_ES fi fi_FI fo fo_FO fr fr_BE fr_CA fr_CH fr_FR \
    137 	         fr_LU ga ga_IE gl gl_ES id id_ID in in_ID is is_IS it it_CH \
    138 	         it_IT kl kl_GL nl nl_BE nl_NL no no_NO pt pt_BR pt_PT sv \
    139 	         sv_FI sv_SE; do
    140 	  echo "$l ISO-8859-1"
    141 	  echo "$l.iso-8859-1 ISO-8859-1"
    142 	  echo "$l.iso-8859-15 ISO-8859-15"
    143 	  echo "$l.iso-8859-15@euro ISO-8859-15"
    144 	  echo "$l@euro ISO-8859-15"
    145 	  echo "$l.cp-437 CP437"
    146 	  echo "$l.cp-850 CP850"
    147 	  echo "$l.cp-1252 CP1252"
    148 	  echo "$l.cp-1252@euro CP1252"
    149 	  #echo "$l.atari-st ATARI-ST" # not a commonly used encoding
    150 	  echo "$l.utf-8 UTF-8"
    151 	  echo "$l.utf-8@euro UTF-8"
    152 	done
    153 	for l in cs cs_CZ hr hr_HR hu hu_HU pl pl_PL ro ro_RO sk sk_SK sl \
    154 	         sl_SI sr sr_CS sr_YU; do
    155 	  echo "$l ISO-8859-2"
    156 	  echo "$l.iso-8859-2 ISO-8859-2"
    157 	  echo "$l.cp-852 CP852"
    158 	  echo "$l.cp-1250 CP1250"
    159 	  echo "$l.utf-8 UTF-8"
    160 	done
    161 	for l in mk mk_MK ru ru_RU; do
    162 	  echo "$l ISO-8859-5"
    163 	  echo "$l.iso-8859-5 ISO-8859-5"
    164 	  echo "$l.koi8-r KOI8-R"
    165 	  echo "$l.cp-866 CP866"
    166 	  echo "$l.cp-1251 CP1251"
    167 	  echo "$l.utf-8 UTF-8"
    168 	done
    169 	for l in ar ar_SA; do
    170 	  echo "$l ISO-8859-6"
    171 	  echo "$l.iso-8859-6 ISO-8859-6"
    172 	  echo "$l.cp-864 CP864"
    173 	  #echo "$l.cp-868 CP868" # not a commonly used encoding
    174 	  echo "$l.cp-1256 CP1256"
    175 	  echo "$l.utf-8 UTF-8"
    176 	done
    177 	for l in el el_GR gr gr_GR; do
    178 	  echo "$l ISO-8859-7"
    179 	  echo "$l.iso-8859-7 ISO-8859-7"
    180 	  echo "$l.cp-869 CP869"
    181 	  echo "$l.cp-1253 CP1253"
    182 	  echo "$l.cp-1253@euro CP1253"
    183 	  echo "$l.utf-8 UTF-8"
    184 	  echo "$l.utf-8@euro UTF-8"
    185 	done
    186 	for l in he he_IL iw iw_IL; do
    187 	  echo "$l ISO-8859-8"
    188 	  echo "$l.iso-8859-8 ISO-8859-8"
    189 	  echo "$l.cp-862 CP862"
    190 	  echo "$l.cp-1255 CP1255"
    191 	  echo "$l.utf-8 UTF-8"
    192 	done
    193 	for l in tr tr_TR; do
    194 	  echo "$l ISO-8859-9"
    195 	  echo "$l.iso-8859-9 ISO-8859-9"
    196 	  echo "$l.cp-857 CP857"
    197 	  echo "$l.cp-1254 CP1254"
    198 	  echo "$l.utf-8 UTF-8"
    199 	done
    200 	for l in lt lt_LT lv lv_LV; do
    201 	  #echo "$l BALTIC" # not a commonly used encoding, wrong encoding name
    202 	  echo "$l ISO-8859-13"
    203 	done
    204 	for l in ru_UA uk uk_UA; do
    205 	  echo "$l KOI8-U"
    206 	done
    207 	for l in zh zh_CN; do
    208 	  #echo "$l GB_2312-80" # not a commonly used encoding, wrong encoding name
    209 	  echo "$l GB2312"
    210 	done
    211 	for l in ja ja_JP ja_JP.EUC; do
    212 	  echo "$l EUC-JP"
    213 	done
    214 	for l in ko ko_KR; do
    215 	  echo "$l EUC-KR"
    216 	done
    217 	for l in th th_TH; do
    218 	  echo "$l TIS-620"
    219 	done
    220 	for l in fa fa_IR; do
    221 	  #echo "$l ISIRI-3342" # a broken encoding
    222 	  echo "$l.utf-8 UTF-8"
    223 	done
    224 	;;
    225     linux* | *-gnu*)
    226 	# With glibc-2.1 or newer, we don't need any canonicalization,
    227 	# because glibc has iconv and both glibc and libiconv support all
    228 	# GNU canonical names directly. Therefore, the Makefile does not
    229 	# need to install the alias file at all.
    230 	# The following applies only to glibc-2.0.x and older libcs.
    231 	echo "ISO_646.IRV:1983 ASCII"
    232 	;;
    233     aix*)
    234 	echo "ISO8859-1 ISO-8859-1"
    235 	echo "ISO8859-2 ISO-8859-2"
    236 	echo "ISO8859-5 ISO-8859-5"
    237 	echo "ISO8859-6 ISO-8859-6"
    238 	echo "ISO8859-7 ISO-8859-7"
    239 	echo "ISO8859-8 ISO-8859-8"
    240 	echo "ISO8859-9 ISO-8859-9"
    241 	echo "ISO8859-15 ISO-8859-15"
    242 	echo "IBM-850 CP850"
    243 	echo "IBM-856 CP856"
    244 	echo "IBM-921 ISO-8859-13"
    245 	echo "IBM-922 CP922"
    246 	echo "IBM-932 CP932"
    247 	echo "IBM-943 CP943"
    248 	echo "IBM-1046 CP1046"
    249 	echo "IBM-1124 CP1124"
    250 	echo "IBM-1129 CP1129"
    251 	echo "IBM-1252 CP1252"
    252 	echo "IBM-eucCN GB2312"
    253 	echo "IBM-eucJP EUC-JP"
    254 	echo "IBM-eucKR EUC-KR"
    255 	echo "IBM-eucTW EUC-TW"
    256 	echo "big5 BIG5"
    257 	echo "GBK GBK"
    258 	echo "TIS-620 TIS-620"
    259 	echo "UTF-8 UTF-8"
    260 	;;
    261     hpux*)
    262 	echo "iso88591 ISO-8859-1"
    263 	echo "iso88592 ISO-8859-2"
    264 	echo "iso88595 ISO-8859-5"
    265 	echo "iso88596 ISO-8859-6"
    266 	echo "iso88597 ISO-8859-7"
    267 	echo "iso88598 ISO-8859-8"
    268 	echo "iso88599 ISO-8859-9"
    269 	echo "iso885915 ISO-8859-15"
    270 	echo "roman8 HP-ROMAN8"
    271 	echo "arabic8 HP-ARABIC8"
    272 	echo "greek8 HP-GREEK8"
    273 	echo "hebrew8 HP-HEBREW8"
    274 	echo "turkish8 HP-TURKISH8"
    275 	echo "kana8 HP-KANA8"
    276 	echo "tis620 TIS-620"
    277 	echo "big5 BIG5"
    278 	echo "eucJP EUC-JP"
    279 	echo "eucKR EUC-KR"
    280 	echo "eucTW EUC-TW"
    281 	echo "hp15CN GB2312"
    282 	#echo "ccdc ?" # what is this?
    283 	echo "SJIS SHIFT_JIS"
    284 	echo "utf8 UTF-8"
    285 	;;
    286     irix*)
    287 	echo "ISO8859-1 ISO-8859-1"
    288 	echo "ISO8859-2 ISO-8859-2"
    289 	echo "ISO8859-5 ISO-8859-5"
    290 	echo "ISO8859-7 ISO-8859-7"
    291 	echo "ISO8859-9 ISO-8859-9"
    292 	echo "eucCN GB2312"
    293 	echo "eucJP EUC-JP"
    294 	echo "eucKR EUC-KR"
    295 	echo "eucTW EUC-TW"
    296 	;;
    297     osf*)
    298 	echo "ISO8859-1 ISO-8859-1"
    299 	echo "ISO8859-2 ISO-8859-2"
    300 	echo "ISO8859-4 ISO-8859-4"
    301 	echo "ISO8859-5 ISO-8859-5"
    302 	echo "ISO8859-7 ISO-8859-7"
    303 	echo "ISO8859-8 ISO-8859-8"
    304 	echo "ISO8859-9 ISO-8859-9"
    305 	echo "ISO8859-15 ISO-8859-15"
    306 	echo "cp850 CP850"
    307 	echo "big5 BIG5"
    308 	echo "dechanyu DEC-HANYU"
    309 	echo "dechanzi GB2312"
    310 	echo "deckanji DEC-KANJI"
    311 	echo "deckorean EUC-KR"
    312 	echo "eucJP EUC-JP"
    313 	echo "eucKR EUC-KR"
    314 	echo "eucTW EUC-TW"
    315 	echo "GBK GBK"
    316 	echo "KSC5601 CP949"
    317 	echo "sdeckanji EUC-JP"
    318 	echo "SJIS SHIFT_JIS"
    319 	echo "TACTIS TIS-620"
    320 	echo "UTF-8 UTF-8"
    321 	;;
    322     solaris*)
    323 	echo "646 ASCII"
    324 	echo "ISO8859-1 ISO-8859-1"
    325 	echo "ISO8859-2 ISO-8859-2"
    326 	echo "ISO8859-3 ISO-8859-3"
    327 	echo "ISO8859-4 ISO-8859-4"
    328 	echo "ISO8859-5 ISO-8859-5"
    329 	echo "ISO8859-6 ISO-8859-6"
    330 	echo "ISO8859-7 ISO-8859-7"
    331 	echo "ISO8859-8 ISO-8859-8"
    332 	echo "ISO8859-9 ISO-8859-9"
    333 	echo "ISO8859-15 ISO-8859-15"
    334 	echo "koi8-r KOI8-R"
    335 	echo "ansi-1251 CP1251"
    336 	echo "BIG5 BIG5"
    337 	echo "Big5-HKSCS BIG5-HKSCS"
    338 	echo "gb2312 GB2312"
    339 	echo "GBK GBK"
    340 	echo "GB18030 GB18030"
    341 	echo "cns11643 EUC-TW"
    342 	echo "5601 EUC-KR"
    343 	echo "ko_KR.johap92 JOHAB"
    344 	echo "eucJP EUC-JP"
    345 	echo "PCK SHIFT_JIS"
    346 	echo "TIS620.2533 TIS-620"
    347 	#echo "sun_eu_greek ?" # what is this?
    348 	echo "UTF-8 UTF-8"
    349 	;;
    350     freebsd* | os2*)
    351 	# FreeBSD 4.2 doesn't have nl_langinfo(CODESET); therefore
    352 	# localcharset.c falls back to using the full locale name
    353 	# from the environment variables.
    354 	# Likewise for OS/2. OS/2 has XFree86 just like FreeBSD. Just
    355 	# reuse FreeBSD's locale data for OS/2.
    356 	echo "C ASCII"
    357 	echo "US-ASCII ASCII"
    358 	for l in la_LN lt_LN; do
    359 	  echo "$l.ASCII ASCII"
    360 	done
    361 	for l in da_DK de_AT de_CH de_DE en_AU en_CA en_GB en_US es_ES \
    362 	         fi_FI fr_BE fr_CA fr_CH fr_FR is_IS it_CH it_IT la_LN \
    363 	         lt_LN nl_BE nl_NL no_NO pt_PT sv_SE; do
    364 	  echo "$l.ISO_8859-1 ISO-8859-1"
    365 	  echo "$l.DIS_8859-15 ISO-8859-15"
    366 	done
    367 	for l in cs_CZ hr_HR hu_HU la_LN lt_LN pl_PL sl_SI; do
    368 	  echo "$l.ISO_8859-2 ISO-8859-2"
    369 	done
    370 	for l in la_LN lt_LT; do
    371 	  echo "$l.ISO_8859-4 ISO-8859-4"
    372 	done
    373 	for l in ru_RU ru_SU; do
    374 	  echo "$l.KOI8-R KOI8-R"
    375 	  echo "$l.ISO_8859-5 ISO-8859-5"
    376 	  echo "$l.CP866 CP866"
    377 	done
    378 	echo "uk_UA.KOI8-U KOI8-U"
    379 	echo "zh_TW.BIG5 BIG5"
    380 	echo "zh_TW.Big5 BIG5"
    381 	echo "zh_CN.EUC GB2312"
    382 	echo "ja_JP.EUC EUC-JP"
    383 	echo "ja_JP.SJIS SHIFT_JIS"
    384 	echo "ja_JP.Shift_JIS SHIFT_JIS"
    385 	echo "ko_KR.EUC EUC-KR"
    386 	;;
    387     netbsd*)
    388 	echo "646 ASCII"
    389 	echo "ISO8859-1 ISO-8859-1"
    390 	echo "ISO8859-2 ISO-8859-2"
    391 	echo "ISO8859-4 ISO-8859-4"
    392 	echo "ISO8859-5 ISO-8859-5"
    393 	echo "ISO8859-7 ISO-8859-7"
    394 	echo "ISO8859-13 ISO-8859-13"
    395 	echo "ISO8859-15 ISO-8859-15"
    396 	echo "eucCN GB2312"
    397 	echo "eucJP EUC-JP"
    398 	echo "eucKR EUC-KR"
    399 	echo "eucTW EUC-TW"
    400 	echo "BIG5 BIG5"
    401 	echo "SJIS SHIFT_JIS"
    402 	;;
    403     openbsd*)
    404 	echo "646 ASCII"
    405 	echo "ISO8859-1 ISO-8859-1"
    406 	echo "ISO8859-2 ISO-8859-2"
    407 	echo "ISO8859-4 ISO-8859-4"
    408 	echo "ISO8859-5 ISO-8859-5"
    409 	echo "ISO8859-7 ISO-8859-7"
    410 	echo "ISO8859-13 ISO-8859-13"
    411 	echo "ISO8859-15 ISO-8859-15"
    412 	;;
    413     darwin[56]*)
    414 	# Darwin 6.8 doesn't have nl_langinfo(CODESET); therefore
    415 	# localcharset.c falls back to using the full locale name
    416 	# from the environment variables.
    417 	echo "C ASCII"
    418 	for l in en_AU en_CA en_GB en_US la_LN; do
    419 	  echo "$l.US-ASCII ASCII"
    420 	done
    421 	for l in da_DK de_AT de_CH de_DE en_AU en_CA en_GB en_US es_ES \
    422 	         fi_FI fr_BE fr_CA fr_CH fr_FR is_IS it_CH it_IT nl_BE \
    423 	         nl_NL no_NO pt_PT sv_SE; do
    424 	  echo "$l ISO-8859-1"
    425 	  echo "$l.ISO8859-1 ISO-8859-1"
    426 	  echo "$l.ISO8859-15 ISO-8859-15"
    427 	done
    428 	for l in la_LN; do
    429 	  echo "$l.ISO8859-1 ISO-8859-1"
    430 	  echo "$l.ISO8859-15 ISO-8859-15"
    431 	done
    432 	for l in cs_CZ hr_HR hu_HU la_LN pl_PL sl_SI; do
    433 	  echo "$l.ISO8859-2 ISO-8859-2"
    434 	done
    435 	for l in la_LN lt_LT; do
    436 	  echo "$l.ISO8859-4 ISO-8859-4"
    437 	done
    438 	for l in ru_RU; do
    439 	  echo "$l.KOI8-R KOI8-R"
    440 	  echo "$l.ISO8859-5 ISO-8859-5"
    441 	  echo "$l.CP866 CP866"
    442 	done
    443 	for l in bg_BG; do
    444 	  echo "$l.CP1251 CP1251"
    445 	done
    446 	echo "uk_UA.KOI8-U KOI8-U"
    447 	echo "zh_TW.BIG5 BIG5"
    448 	echo "zh_TW.Big5 BIG5"
    449 	echo "zh_CN.EUC GB2312"
    450 	echo "ja_JP.EUC EUC-JP"
    451 	echo "ja_JP.SJIS SHIFT_JIS"
    452 	echo "ko_KR.EUC EUC-KR"
    453 	;;
    454     darwin*)
    455 	# Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
    456 	# useless:
    457 	# - It returns the empty string when LANG is set to a locale of the
    458 	#   form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
    459 	#   LC_CTYPE file.
    460 	# - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
    461 	#   the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
    462 	# - The documentation says:
    463 	#     "... all code that calls BSD system routines should ensure
    464 	#      that the const *char parameters of these routines are in UTF-8
    465 	#      encoding. All BSD system functions expect their string
    466 	#      parameters to be in UTF-8 encoding and nothing else."
    467 	#   It also says
    468 	#     "An additional caveat is that string parameters for files,
    469 	#      paths, and other file-system entities must be in canonical
    470 	#      UTF-8. In a canonical UTF-8 Unicode string, all decomposable
    471 	#      characters are decomposed ..."
    472 	#   but this is not true: You can pass non-decomposed UTF-8 strings
    473 	#   to file system functions, and it is the OS which will convert
    474 	#   them to decomposed UTF-8 before accessing the file system.
    475 	# - The Apple Terminal application displays UTF-8 by default.
    476 	# - However, other applications are free to use different encodings:
    477 	#   - xterm uses ISO-8859-1 by default.
    478 	#   - TextEdit uses MacRoman by default.
    479 	# We prefer UTF-8 over decomposed UTF-8-MAC because one should
    480 	# minimize the use of decomposed Unicode. Unfortunately, through the
    481 	# Darwin file system, decomposed UTF-8 strings are leaked into user
    482 	# space nevertheless.
    483 	# Then there are also the locales with encodings other than US-ASCII
    484 	# and UTF-8. These locales can be occasionally useful to users (e.g.
    485 	# when grepping through ISO-8859-1 encoded text files), when all their
    486 	# file names are in US-ASCII.
    487 	echo "ISO8859-1 ISO-8859-1"
    488 	echo "ISO8859-2 ISO-8859-2"
    489 	echo "ISO8859-4 ISO-8859-4"
    490 	echo "ISO8859-5 ISO-8859-5"
    491 	echo "ISO8859-7 ISO-8859-7"
    492 	echo "ISO8859-9 ISO-8859-9"
    493 	echo "ISO8859-13 ISO-8859-13"
    494 	echo "ISO8859-15 ISO-8859-15"
    495 	echo "KOI8-R KOI8-R"
    496 	echo "KOI8-U KOI8-U"
    497 	echo "CP866 CP866"
    498 	echo "CP949 CP949"
    499 	echo "CP1131 CP1131"
    500 	echo "CP1251 CP1251"
    501 	echo "eucCN GB2312"
    502 	echo "GB2312 GB2312"
    503 	echo "eucJP EUC-JP"
    504 	echo "eucKR EUC-KR"
    505 	echo "Big5 BIG5"
    506 	echo "Big5HKSCS BIG5-HKSCS"
    507 	echo "GBK GBK"
    508 	echo "GB18030 GB18030"
    509 	echo "SJIS SHIFT_JIS"
    510 	echo "ARMSCII-8 ARMSCII-8"
    511 	echo "PT154 PT154"
    512 	#echo "ISCII-DEV ?"
    513 	echo "* UTF-8"
    514 	;;
    515     beos* | haiku*)
    516 	# BeOS and Haiku have a single locale, and it has UTF-8 encoding.
    517 	echo "* UTF-8"
    518 	;;
    519     msdosdjgpp*)
    520 	# DJGPP 2.03 doesn't have nl_langinfo(CODESET); therefore
    521 	# localcharset.c falls back to using the full locale name
    522 	# from the environment variables.
    523 	echo "#"
    524 	echo "# The encodings given here may not all be correct."
    525 	echo "# If you find that the encoding given for your language and"
    526 	echo "# country is not the one your DOS machine actually uses, just"
    527 	echo "# correct it in this file, and send a mail to"
    528 	echo "# Juan Manuel Guerrero <juan.guerrero (at] gmx.de>"
    529 	echo "# and Bruno Haible <bruno (at] clisp.org>."
    530 	echo "#"
    531 	echo "C ASCII"
    532 	# ISO-8859-1 languages
    533 	echo "ca CP850"
    534 	echo "ca_ES CP850"
    535 	echo "da CP865"    # not CP850 ??
    536 	echo "da_DK CP865" # not CP850 ??
    537 	echo "de CP850"
    538 	echo "de_AT CP850"
    539 	echo "de_CH CP850"
    540 	echo "de_DE CP850"
    541 	echo "en CP850"
    542 	echo "en_AU CP850" # not CP437 ??
    543 	echo "en_CA CP850"
    544 	echo "en_GB CP850"
    545 	echo "en_NZ CP437"
    546 	echo "en_US CP437"
    547 	echo "en_ZA CP850" # not CP437 ??
    548 	echo "es CP850"
    549 	echo "es_AR CP850"
    550 	echo "es_BO CP850"
    551 	echo "es_CL CP850"
    552 	echo "es_CO CP850"
    553 	echo "es_CR CP850"
    554 	echo "es_CU CP850"
    555 	echo "es_DO CP850"
    556 	echo "es_EC CP850"
    557 	echo "es_ES CP850"
    558 	echo "es_GT CP850"
    559 	echo "es_HN CP850"
    560 	echo "es_MX CP850"
    561 	echo "es_NI CP850"
    562 	echo "es_PA CP850"
    563 	echo "es_PY CP850"
    564 	echo "es_PE CP850"
    565 	echo "es_SV CP850"
    566 	echo "es_UY CP850"
    567 	echo "es_VE CP850"
    568 	echo "et CP850"
    569 	echo "et_EE CP850"
    570 	echo "eu CP850"
    571 	echo "eu_ES CP850"
    572 	echo "fi CP850"
    573 	echo "fi_FI CP850"
    574 	echo "fr CP850"
    575 	echo "fr_BE CP850"
    576 	echo "fr_CA CP850"
    577 	echo "fr_CH CP850"
    578 	echo "fr_FR CP850"
    579 	echo "ga CP850"
    580 	echo "ga_IE CP850"
    581 	echo "gd CP850"
    582 	echo "gd_GB CP850"
    583 	echo "gl CP850"
    584 	echo "gl_ES CP850"
    585 	echo "id CP850"    # not CP437 ??
    586 	echo "id_ID CP850" # not CP437 ??
    587 	echo "is CP861"    # not CP850 ??
    588 	echo "is_IS CP861" # not CP850 ??
    589 	echo "it CP850"
    590 	echo "it_CH CP850"
    591 	echo "it_IT CP850"
    592 	echo "lt CP775"
    593 	echo "lt_LT CP775"
    594 	echo "lv CP775"
    595 	echo "lv_LV CP775"
    596 	echo "nb CP865"    # not CP850 ??
    597 	echo "nb_NO CP865" # not CP850 ??
    598 	echo "nl CP850"
    599 	echo "nl_BE CP850"
    600 	echo "nl_NL CP850"
    601 	echo "nn CP865"    # not CP850 ??
    602 	echo "nn_NO CP865" # not CP850 ??
    603 	echo "no CP865"    # not CP850 ??
    604 	echo "no_NO CP865" # not CP850 ??
    605 	echo "pt CP850"
    606 	echo "pt_BR CP850"
    607 	echo "pt_PT CP850"
    608 	echo "sv CP850"
    609 	echo "sv_SE CP850"
    610 	# ISO-8859-2 languages
    611 	echo "cs CP852"
    612 	echo "cs_CZ CP852"
    613 	echo "hr CP852"
    614 	echo "hr_HR CP852"
    615 	echo "hu CP852"
    616 	echo "hu_HU CP852"
    617 	echo "pl CP852"
    618 	echo "pl_PL CP852"
    619 	echo "ro CP852"
    620 	echo "ro_RO CP852"
    621 	echo "sk CP852"
    622 	echo "sk_SK CP852"
    623 	echo "sl CP852"
    624 	echo "sl_SI CP852"
    625 	echo "sq CP852"
    626 	echo "sq_AL CP852"
    627 	echo "sr CP852"    # CP852 or CP866 or CP855 ??
    628 	echo "sr_CS CP852" # CP852 or CP866 or CP855 ??
    629 	echo "sr_YU CP852" # CP852 or CP866 or CP855 ??
    630 	# ISO-8859-3 languages
    631 	echo "mt CP850"
    632 	echo "mt_MT CP850"
    633 	# ISO-8859-5 languages
    634 	echo "be CP866"
    635 	echo "be_BE CP866"
    636 	echo "bg CP866"    # not CP855 ??
    637 	echo "bg_BG CP866" # not CP855 ??
    638 	echo "mk CP866"    # not CP855 ??
    639 	echo "mk_MK CP866" # not CP855 ??
    640 	echo "ru CP866"
    641 	echo "ru_RU CP866"
    642 	echo "uk CP1125"
    643 	echo "uk_UA CP1125"
    644 	# ISO-8859-6 languages
    645 	echo "ar CP864"
    646 	echo "ar_AE CP864"
    647 	echo "ar_DZ CP864"
    648 	echo "ar_EG CP864"
    649 	echo "ar_IQ CP864"
    650 	echo "ar_IR CP864"
    651 	echo "ar_JO CP864"
    652 	echo "ar_KW CP864"
    653 	echo "ar_MA CP864"
    654 	echo "ar_OM CP864"
    655 	echo "ar_QA CP864"
    656 	echo "ar_SA CP864"
    657 	echo "ar_SY CP864"
    658 	# ISO-8859-7 languages
    659 	echo "el CP869"
    660 	echo "el_GR CP869"
    661 	# ISO-8859-8 languages
    662 	echo "he CP862"
    663 	echo "he_IL CP862"
    664 	# ISO-8859-9 languages
    665 	echo "tr CP857"
    666 	echo "tr_TR CP857"
    667 	# Japanese
    668 	echo "ja CP932"
    669 	echo "ja_JP CP932"
    670 	# Chinese
    671 	echo "zh_CN GBK"
    672 	echo "zh_TW CP950" # not CP938 ??
    673 	# Korean
    674 	echo "kr CP949"    # not CP934 ??
    675 	echo "kr_KR CP949" # not CP934 ??
    676 	# Thai
    677 	echo "th CP874"
    678 	echo "th_TH CP874"
    679 	# Other
    680 	echo "eo CP850"
    681 	echo "eo_EO CP850"
    682 	;;
    683 esac
    684