Home | History | Annotate | Download | only in Lib
      1 """Locale support module.
      2 
      3 The module provides low-level access to the C lib's locale APIs and adds high
      4 level number formatting APIs as well as a locale aliasing engine to complement
      5 these.
      6 
      7 The aliasing engine includes support for many commonly used locale names and
      8 maps them to values suitable for passing to the C lib's setlocale() function. It
      9 also includes default encodings for all supported locale names.
     10 
     11 """
     12 
     13 import sys
     14 import encodings
     15 import encodings.aliases
     16 import re
     17 import collections
     18 from builtins import str as _builtin_str
     19 import functools
     20 
     21 # Try importing the _locale module.
     22 #
     23 # If this fails, fall back on a basic 'C' locale emulation.
     24 
     25 # Yuck:  LC_MESSAGES is non-standard:  can't tell whether it exists before
     26 # trying the import.  So __all__ is also fiddled at the end of the file.
     27 __all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
     28            "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
     29            "str", "atof", "atoi", "format", "format_string", "currency",
     30            "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
     31            "LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
     32 
     33 def _strcoll(a,b):
     34     """ strcoll(string,string) -> int.
     35         Compares two strings according to the locale.
     36     """
     37     return (a > b) - (a < b)
     38 
     39 def _strxfrm(s):
     40     """ strxfrm(string) -> string.
     41         Returns a string that behaves for cmp locale-aware.
     42     """
     43     return s
     44 
     45 try:
     46 
     47     from _locale import *
     48 
     49 except ImportError:
     50 
     51     # Locale emulation
     52 
     53     CHAR_MAX = 127
     54     LC_ALL = 6
     55     LC_COLLATE = 3
     56     LC_CTYPE = 0
     57     LC_MESSAGES = 5
     58     LC_MONETARY = 4
     59     LC_NUMERIC = 1
     60     LC_TIME = 2
     61     Error = ValueError
     62 
     63     def localeconv():
     64         """ localeconv() -> dict.
     65             Returns numeric and monetary locale-specific parameters.
     66         """
     67         # 'C' locale default values
     68         return {'grouping': [127],
     69                 'currency_symbol': '',
     70                 'n_sign_posn': 127,
     71                 'p_cs_precedes': 127,
     72                 'n_cs_precedes': 127,
     73                 'mon_grouping': [],
     74                 'n_sep_by_space': 127,
     75                 'decimal_point': '.',
     76                 'negative_sign': '',
     77                 'positive_sign': '',
     78                 'p_sep_by_space': 127,
     79                 'int_curr_symbol': '',
     80                 'p_sign_posn': 127,
     81                 'thousands_sep': '',
     82                 'mon_thousands_sep': '',
     83                 'frac_digits': 127,
     84                 'mon_decimal_point': '',
     85                 'int_frac_digits': 127}
     86 
     87     def setlocale(category, value=None):
     88         """ setlocale(integer,string=None) -> string.
     89             Activates/queries locale processing.
     90         """
     91         if value not in (None, '', 'C'):
     92             raise Error('_locale emulation only supports "C" locale')
     93         return 'C'
     94 
     95 # These may or may not exist in _locale, so be sure to set them.
     96 if 'strxfrm' not in globals():
     97     strxfrm = _strxfrm
     98 if 'strcoll' not in globals():
     99     strcoll = _strcoll
    100 
    101 
    102 _localeconv = localeconv
    103 
    104 # With this dict, you can override some items of localeconv's return value.
    105 # This is useful for testing purposes.
    106 _override_localeconv = {}
    107 
    108 @functools.wraps(_localeconv)
    109 def localeconv():
    110     d = _localeconv()
    111     if _override_localeconv:
    112         d.update(_override_localeconv)
    113     return d
    114 
    115 
    116 ### Number formatting APIs
    117 
    118 # Author: Martin von Loewis
    119 # improved by Georg Brandl
    120 
    121 # Iterate over grouping intervals
    122 def _grouping_intervals(grouping):
    123     last_interval = None
    124     for interval in grouping:
    125         # if grouping is -1, we are done
    126         if interval == CHAR_MAX:
    127             return
    128         # 0: re-use last group ad infinitum
    129         if interval == 0:
    130             if last_interval is None:
    131                 raise ValueError("invalid grouping")
    132             while True:
    133                 yield last_interval
    134         yield interval
    135         last_interval = interval
    136 
    137 #perform the grouping from right to left
    138 def _group(s, monetary=False):
    139     conv = localeconv()
    140     thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
    141     grouping = conv[monetary and 'mon_grouping' or 'grouping']
    142     if not grouping:
    143         return (s, 0)
    144     if s[-1] == ' ':
    145         stripped = s.rstrip()
    146         right_spaces = s[len(stripped):]
    147         s = stripped
    148     else:
    149         right_spaces = ''
    150     left_spaces = ''
    151     groups = []
    152     for interval in _grouping_intervals(grouping):
    153         if not s or s[-1] not in "0123456789":
    154             # only non-digit characters remain (sign, spaces)
    155             left_spaces = s
    156             s = ''
    157             break
    158         groups.append(s[-interval:])
    159         s = s[:-interval]
    160     if s:
    161         groups.append(s)
    162     groups.reverse()
    163     return (
    164         left_spaces + thousands_sep.join(groups) + right_spaces,
    165         len(thousands_sep) * (len(groups) - 1)
    166     )
    167 
    168 # Strip a given amount of excess padding from the given string
    169 def _strip_padding(s, amount):
    170     lpos = 0
    171     while amount and s[lpos] == ' ':
    172         lpos += 1
    173         amount -= 1
    174     rpos = len(s) - 1
    175     while amount and s[rpos] == ' ':
    176         rpos -= 1
    177         amount -= 1
    178     return s[lpos:rpos+1]
    179 
    180 _percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
    181                          r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
    182 
    183 def format(percent, value, grouping=False, monetary=False, *additional):
    184     """Returns the locale-aware substitution of a %? specifier
    185     (percent).
    186 
    187     additional is for format strings which contain one or more
    188     '*' modifiers."""
    189     # this is only for one-percent-specifier strings and this should be checked
    190     match = _percent_re.match(percent)
    191     if not match or len(match.group())!= len(percent):
    192         raise ValueError(("format() must be given exactly one %%char "
    193                          "format specifier, %s not valid") % repr(percent))
    194     return _format(percent, value, grouping, monetary, *additional)
    195 
    196 def _format(percent, value, grouping=False, monetary=False, *additional):
    197     if additional:
    198         formatted = percent % ((value,) + additional)
    199     else:
    200         formatted = percent % value
    201     # floats and decimal ints need special action!
    202     if percent[-1] in 'eEfFgG':
    203         seps = 0
    204         parts = formatted.split('.')
    205         if grouping:
    206             parts[0], seps = _group(parts[0], monetary=monetary)
    207         decimal_point = localeconv()[monetary and 'mon_decimal_point'
    208                                               or 'decimal_point']
    209         formatted = decimal_point.join(parts)
    210         if seps:
    211             formatted = _strip_padding(formatted, seps)
    212     elif percent[-1] in 'diu':
    213         seps = 0
    214         if grouping:
    215             formatted, seps = _group(formatted, monetary=monetary)
    216         if seps:
    217             formatted = _strip_padding(formatted, seps)
    218     return formatted
    219 
    220 def format_string(f, val, grouping=False):
    221     """Formats a string in the same way that the % formatting would use,
    222     but takes the current locale into account.
    223     Grouping is applied if the third parameter is true."""
    224     percents = list(_percent_re.finditer(f))
    225     new_f = _percent_re.sub('%s', f)
    226 
    227     if isinstance(val, collections.Mapping):
    228         new_val = []
    229         for perc in percents:
    230             if perc.group()[-1]=='%':
    231                 new_val.append('%')
    232             else:
    233                 new_val.append(format(perc.group(), val, grouping))
    234     else:
    235         if not isinstance(val, tuple):
    236             val = (val,)
    237         new_val = []
    238         i = 0
    239         for perc in percents:
    240             if perc.group()[-1]=='%':
    241                 new_val.append('%')
    242             else:
    243                 starcount = perc.group('modifiers').count('*')
    244                 new_val.append(_format(perc.group(),
    245                                       val[i],
    246                                       grouping,
    247                                       False,
    248                                       *val[i+1:i+1+starcount]))
    249                 i += (1 + starcount)
    250     val = tuple(new_val)
    251 
    252     return new_f % val
    253 
    254 def currency(val, symbol=True, grouping=False, international=False):
    255     """Formats val according to the currency settings
    256     in the current locale."""
    257     conv = localeconv()
    258 
    259     # check for illegal values
    260     digits = conv[international and 'int_frac_digits' or 'frac_digits']
    261     if digits == 127:
    262         raise ValueError("Currency formatting is not possible using "
    263                          "the 'C' locale.")
    264 
    265     s = format('%%.%if' % digits, abs(val), grouping, monetary=True)
    266     # '<' and '>' are markers if the sign must be inserted between symbol and value
    267     s = '<' + s + '>'
    268 
    269     if symbol:
    270         smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
    271         precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
    272         separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
    273 
    274         if precedes:
    275             s = smb + (separated and ' ' or '') + s
    276         else:
    277             s = s + (separated and ' ' or '') + smb
    278 
    279     sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
    280     sign = conv[val<0 and 'negative_sign' or 'positive_sign']
    281 
    282     if sign_pos == 0:
    283         s = '(' + s + ')'
    284     elif sign_pos == 1:
    285         s = sign + s
    286     elif sign_pos == 2:
    287         s = s + sign
    288     elif sign_pos == 3:
    289         s = s.replace('<', sign)
    290     elif sign_pos == 4:
    291         s = s.replace('>', sign)
    292     else:
    293         # the default if nothing specified;
    294         # this should be the most fitting sign position
    295         s = sign + s
    296 
    297     return s.replace('<', '').replace('>', '')
    298 
    299 def str(val):
    300     """Convert float to string, taking the locale into account."""
    301     return format("%.12g", val)
    302 
    303 def delocalize(string):
    304     "Parses a string as a normalized number according to the locale settings."
    305 
    306     conv = localeconv()
    307 
    308     #First, get rid of the grouping
    309     ts = conv['thousands_sep']
    310     if ts:
    311         string = string.replace(ts, '')
    312 
    313     #next, replace the decimal point with a dot
    314     dd = conv['decimal_point']
    315     if dd:
    316         string = string.replace(dd, '.')
    317     return string
    318 
    319 def atof(string, func=float):
    320     "Parses a string as a float according to the locale settings."
    321     return func(delocalize(string))
    322 
    323 def atoi(string):
    324     "Converts a string to an integer according to the locale settings."
    325     return int(delocalize(string))
    326 
    327 def _test():
    328     setlocale(LC_ALL, "")
    329     #do grouping
    330     s1 = format("%d", 123456789,1)
    331     print(s1, "is", atoi(s1))
    332     #standard formatting
    333     s1 = str(3.14)
    334     print(s1, "is", atof(s1))
    335 
    336 ### Locale name aliasing engine
    337 
    338 # Author: Marc-Andre Lemburg, mal (at] lemburg.com
    339 # Various tweaks by Fredrik Lundh <fredrik (at] pythonware.com>
    340 
    341 # store away the low-level version of setlocale (it's
    342 # overridden below)
    343 _setlocale = setlocale
    344 
    345 def _replace_encoding(code, encoding):
    346     if '.' in code:
    347         langname = code[:code.index('.')]
    348     else:
    349         langname = code
    350     # Convert the encoding to a C lib compatible encoding string
    351     norm_encoding = encodings.normalize_encoding(encoding)
    352     #print('norm encoding: %r' % norm_encoding)
    353     norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
    354                                                   norm_encoding)
    355     #print('aliased encoding: %r' % norm_encoding)
    356     encoding = norm_encoding
    357     norm_encoding = norm_encoding.lower()
    358     if norm_encoding in locale_encoding_alias:
    359         encoding = locale_encoding_alias[norm_encoding]
    360     else:
    361         norm_encoding = norm_encoding.replace('_', '')
    362         norm_encoding = norm_encoding.replace('-', '')
    363         if norm_encoding in locale_encoding_alias:
    364             encoding = locale_encoding_alias[norm_encoding]
    365     #print('found encoding %r' % encoding)
    366     return langname + '.' + encoding
    367 
    368 def _append_modifier(code, modifier):
    369     if modifier == 'euro':
    370         if '.' not in code:
    371             return code + '.ISO8859-15'
    372         _, _, encoding = code.partition('.')
    373         if encoding in ('ISO8859-15', 'UTF-8'):
    374             return code
    375         if encoding == 'ISO8859-1':
    376             return _replace_encoding(code, 'ISO8859-15')
    377     return code + '@' + modifier
    378 
    379 def normalize(localename):
    380 
    381     """ Returns a normalized locale code for the given locale
    382         name.
    383 
    384         The returned locale code is formatted for use with
    385         setlocale().
    386 
    387         If normalization fails, the original name is returned
    388         unchanged.
    389 
    390         If the given encoding is not known, the function defaults to
    391         the default encoding for the locale code just like setlocale()
    392         does.
    393 
    394     """
    395     # Normalize the locale name and extract the encoding and modifier
    396     code = localename.lower()
    397     if ':' in code:
    398         # ':' is sometimes used as encoding delimiter.
    399         code = code.replace(':', '.')
    400     if '@' in code:
    401         code, modifier = code.split('@', 1)
    402     else:
    403         modifier = ''
    404     if '.' in code:
    405         langname, encoding = code.split('.')[:2]
    406     else:
    407         langname = code
    408         encoding = ''
    409 
    410     # First lookup: fullname (possibly with encoding and modifier)
    411     lang_enc = langname
    412     if encoding:
    413         norm_encoding = encoding.replace('-', '')
    414         norm_encoding = norm_encoding.replace('_', '')
    415         lang_enc += '.' + norm_encoding
    416     lookup_name = lang_enc
    417     if modifier:
    418         lookup_name += '@' + modifier
    419     code = locale_alias.get(lookup_name, None)
    420     if code is not None:
    421         return code
    422     #print('first lookup failed')
    423 
    424     if modifier:
    425         # Second try: fullname without modifier (possibly with encoding)
    426         code = locale_alias.get(lang_enc, None)
    427         if code is not None:
    428             #print('lookup without modifier succeeded')
    429             if '@' not in code:
    430                 return _append_modifier(code, modifier)
    431             if code.split('@', 1)[1].lower() == modifier:
    432                 return code
    433         #print('second lookup failed')
    434 
    435     if encoding:
    436         # Third try: langname (without encoding, possibly with modifier)
    437         lookup_name = langname
    438         if modifier:
    439             lookup_name += '@' + modifier
    440         code = locale_alias.get(lookup_name, None)
    441         if code is not None:
    442             #print('lookup without encoding succeeded')
    443             if '@' not in code:
    444                 return _replace_encoding(code, encoding)
    445             code, modifier = code.split('@', 1)
    446             return _replace_encoding(code, encoding) + '@' + modifier
    447 
    448         if modifier:
    449             # Fourth try: langname (without encoding and modifier)
    450             code = locale_alias.get(langname, None)
    451             if code is not None:
    452                 #print('lookup without modifier and encoding succeeded')
    453                 if '@' not in code:
    454                     code = _replace_encoding(code, encoding)
    455                     return _append_modifier(code, modifier)
    456                 code, defmod = code.split('@', 1)
    457                 if defmod.lower() == modifier:
    458                     return _replace_encoding(code, encoding) + '@' + defmod
    459 
    460     return localename
    461 
    462 def _parse_localename(localename):
    463 
    464     """ Parses the locale code for localename and returns the
    465         result as tuple (language code, encoding).
    466 
    467         The localename is normalized and passed through the locale
    468         alias engine. A ValueError is raised in case the locale name
    469         cannot be parsed.
    470 
    471         The language code corresponds to RFC 1766.  code and encoding
    472         can be None in case the values cannot be determined or are
    473         unknown to this implementation.
    474 
    475     """
    476     code = normalize(localename)
    477     if '@' in code:
    478         # Deal with locale modifiers
    479         code, modifier = code.split('@', 1)
    480         if modifier == 'euro' and '.' not in code:
    481             # Assume Latin-9 for @euro locales. This is bogus,
    482             # since some systems may use other encodings for these
    483             # locales. Also, we ignore other modifiers.
    484             return code, 'iso-8859-15'
    485 
    486     if '.' in code:
    487         return tuple(code.split('.')[:2])
    488     elif code == 'C':
    489         return None, None
    490     raise ValueError('unknown locale: %s' % localename)
    491 
    492 def _build_localename(localetuple):
    493 
    494     """ Builds a locale code from the given tuple (language code,
    495         encoding).
    496 
    497         No aliasing or normalizing takes place.
    498 
    499     """
    500     try:
    501         language, encoding = localetuple
    502 
    503         if language is None:
    504             language = 'C'
    505         if encoding is None:
    506             return language
    507         else:
    508             return language + '.' + encoding
    509     except (TypeError, ValueError):
    510         raise TypeError('Locale must be None, a string, or an iterable of two strings -- language code, encoding.')
    511 
    512 def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
    513 
    514     """ Tries to determine the default locale settings and returns
    515         them as tuple (language code, encoding).
    516 
    517         According to POSIX, a program which has not called
    518         setlocale(LC_ALL, "") runs using the portable 'C' locale.
    519         Calling setlocale(LC_ALL, "") lets it use the default locale as
    520         defined by the LANG variable. Since we don't want to interfere
    521         with the current locale setting we thus emulate the behavior
    522         in the way described above.
    523 
    524         To maintain compatibility with other platforms, not only the
    525         LANG variable is tested, but a list of variables given as
    526         envvars parameter. The first found to be defined will be
    527         used. envvars defaults to the search path used in GNU gettext;
    528         it must always contain the variable name 'LANG'.
    529 
    530         Except for the code 'C', the language code corresponds to RFC
    531         1766.  code and encoding can be None in case the values cannot
    532         be determined.
    533 
    534     """
    535 
    536     try:
    537         # check if it's supported by the _locale module
    538         import _locale
    539         code, encoding = _locale._getdefaultlocale()
    540     except (ImportError, AttributeError):
    541         pass
    542     else:
    543         # make sure the code/encoding values are valid
    544         if sys.platform == "win32" and code and code[:2] == "0x":
    545             # map windows language identifier to language name
    546             code = windows_locale.get(int(code, 0))
    547         # ...add other platform-specific processing here, if
    548         # necessary...
    549         return code, encoding
    550 
    551     # fall back on POSIX behaviour
    552     import os
    553     lookup = os.environ.get
    554     for variable in envvars:
    555         localename = lookup(variable,None)
    556         if localename:
    557             if variable == 'LANGUAGE':
    558                 localename = localename.split(':')[0]
    559             break
    560     else:
    561         localename = 'C'
    562     return _parse_localename(localename)
    563 
    564 
    565 def getlocale(category=LC_CTYPE):
    566 
    567     """ Returns the current setting for the given locale category as
    568         tuple (language code, encoding).
    569 
    570         category may be one of the LC_* value except LC_ALL. It
    571         defaults to LC_CTYPE.
    572 
    573         Except for the code 'C', the language code corresponds to RFC
    574         1766.  code and encoding can be None in case the values cannot
    575         be determined.
    576 
    577     """
    578     localename = _setlocale(category)
    579     if category == LC_ALL and ';' in localename:
    580         raise TypeError('category LC_ALL is not supported')
    581     return _parse_localename(localename)
    582 
    583 def setlocale(category, locale=None):
    584 
    585     """ Set the locale for the given category.  The locale can be
    586         a string, an iterable of two strings (language code and encoding),
    587         or None.
    588 
    589         Iterables are converted to strings using the locale aliasing
    590         engine.  Locale strings are passed directly to the C lib.
    591 
    592         category may be given as one of the LC_* values.
    593 
    594     """
    595     if locale and not isinstance(locale, _builtin_str):
    596         # convert to string
    597         locale = normalize(_build_localename(locale))
    598     return _setlocale(category, locale)
    599 
    600 def resetlocale(category=LC_ALL):
    601 
    602     """ Sets the locale for category to the default setting.
    603 
    604         The default setting is determined by calling
    605         getdefaultlocale(). category defaults to LC_ALL.
    606 
    607     """
    608     _setlocale(category, _build_localename(getdefaultlocale()))
    609 
    610 if sys.platform.startswith("win"):
    611     # On Win32, this will return the ANSI code page
    612     def getpreferredencoding(do_setlocale = True):
    613         """Return the charset that the user is likely using."""
    614         import _bootlocale
    615         return _bootlocale.getpreferredencoding(False)
    616 else:
    617     # On Unix, if CODESET is available, use that.
    618     try:
    619         CODESET
    620     except NameError:
    621         # Fall back to parsing environment variables :-(
    622         def getpreferredencoding(do_setlocale = True):
    623             """Return the charset that the user is likely using,
    624             by looking at environment variables."""
    625             res = getdefaultlocale()[1]
    626             if res is None:
    627                 # LANG not set, default conservatively to ASCII
    628                 res = 'ascii'
    629             return res
    630     else:
    631         def getpreferredencoding(do_setlocale = True):
    632             """Return the charset that the user is likely using,
    633             according to the system configuration."""
    634             import _bootlocale
    635             if do_setlocale:
    636                 oldloc = setlocale(LC_CTYPE)
    637                 try:
    638                     setlocale(LC_CTYPE, "")
    639                 except Error:
    640                     pass
    641             result = _bootlocale.getpreferredencoding(False)
    642             if do_setlocale:
    643                 setlocale(LC_CTYPE, oldloc)
    644             return result
    645 
    646 
    647 ### Database
    648 #
    649 # The following data was extracted from the locale.alias file which
    650 # comes with X11 and then hand edited removing the explicit encoding
    651 # definitions and adding some more aliases. The file is usually
    652 # available as /usr/lib/X11/locale/locale.alias.
    653 #
    654 
    655 #
    656 # The local_encoding_alias table maps lowercase encoding alias names
    657 # to C locale encoding names (case-sensitive). Note that normalize()
    658 # first looks up the encoding in the encodings.aliases dictionary and
    659 # then applies this mapping to find the correct C lib name for the
    660 # encoding.
    661 #
    662 locale_encoding_alias = {
    663 
    664     # Mappings for non-standard encoding names used in locale names
    665     '437':                          'C',
    666     'c':                            'C',
    667     'en':                           'ISO8859-1',
    668     'jis':                          'JIS7',
    669     'jis7':                         'JIS7',
    670     'ajec':                         'eucJP',
    671     'koi8c':                        'KOI8-C',
    672     'microsoftcp1251':              'CP1251',
    673     'microsoftcp1255':              'CP1255',
    674     'microsoftcp1256':              'CP1256',
    675     '88591':                        'ISO8859-1',
    676     '88592':                        'ISO8859-2',
    677     '88595':                        'ISO8859-5',
    678     '885915':                       'ISO8859-15',
    679 
    680     # Mappings from Python codec names to C lib encoding names
    681     'ascii':                        'ISO8859-1',
    682     'latin_1':                      'ISO8859-1',
    683     'iso8859_1':                    'ISO8859-1',
    684     'iso8859_10':                   'ISO8859-10',
    685     'iso8859_11':                   'ISO8859-11',
    686     'iso8859_13':                   'ISO8859-13',
    687     'iso8859_14':                   'ISO8859-14',
    688     'iso8859_15':                   'ISO8859-15',
    689     'iso8859_16':                   'ISO8859-16',
    690     'iso8859_2':                    'ISO8859-2',
    691     'iso8859_3':                    'ISO8859-3',
    692     'iso8859_4':                    'ISO8859-4',
    693     'iso8859_5':                    'ISO8859-5',
    694     'iso8859_6':                    'ISO8859-6',
    695     'iso8859_7':                    'ISO8859-7',
    696     'iso8859_8':                    'ISO8859-8',
    697     'iso8859_9':                    'ISO8859-9',
    698     'iso2022_jp':                   'JIS7',
    699     'shift_jis':                    'SJIS',
    700     'tactis':                       'TACTIS',
    701     'euc_jp':                       'eucJP',
    702     'euc_kr':                       'eucKR',
    703     'utf_8':                        'UTF-8',
    704     'koi8_r':                       'KOI8-R',
    705     'koi8_t':                       'KOI8-T',
    706     'koi8_u':                       'KOI8-U',
    707     'kz1048':                       'RK1048',
    708     'cp1251':                       'CP1251',
    709     'cp1255':                       'CP1255',
    710     'cp1256':                       'CP1256',
    711 
    712     # XXX This list is still incomplete. If you know more
    713     # mappings, please file a bug report. Thanks.
    714 }
    715 
    716 for k, v in sorted(locale_encoding_alias.items()):
    717     k = k.replace('_', '')
    718     locale_encoding_alias.setdefault(k, v)
    719 
    720 #
    721 # The locale_alias table maps lowercase alias names to C locale names
    722 # (case-sensitive). Encodings are always separated from the locale
    723 # name using a dot ('.'); they should only be given in case the
    724 # language name is needed to interpret the given encoding alias
    725 # correctly (CJK codes often have this need).
    726 #
    727 # Note that the normalize() function which uses this tables
    728 # removes '_' and '-' characters from the encoding part of the
    729 # locale name before doing the lookup. This saves a lot of
    730 # space in the table.
    731 #
    732 # MAL 2004-12-10:
    733 # Updated alias mapping to most recent locale.alias file
    734 # from X.org distribution using makelocalealias.py.
    735 #
    736 # These are the differences compared to the old mapping (Python 2.4
    737 # and older):
    738 #
    739 #    updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
    740 #    updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
    741 #    updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
    742 #    updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
    743 #    updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
    744 #    updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
    745 #    updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
    746 #    updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
    747 #    updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
    748 #    updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
    749 #    updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
    750 #    updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
    751 #    updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
    752 #    updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
    753 #    updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
    754 #    updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
    755 #    updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
    756 #    updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
    757 #    updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
    758 #    updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
    759 #    updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
    760 #    updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
    761 #
    762 # MAL 2008-05-30:
    763 # Updated alias mapping to most recent locale.alias file
    764 # from X.org distribution using makelocalealias.py.
    765 #
    766 # These are the differences compared to the old mapping (Python 2.5
    767 # and older):
    768 #
    769 #    updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
    770 #    updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
    771 #    updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
    772 #    updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
    773 #    updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
    774 #    updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
    775 #    updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
    776 #    updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
    777 #    updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
    778 #    updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
    779 #    updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
    780 #    updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
    781 #    updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
    782 #    updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
    783 #    updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
    784 #    updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
    785 #    updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
    786 #    updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
    787 #    updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
    788 #
    789 # AP 2010-04-12:
    790 # Updated alias mapping to most recent locale.alias file
    791 # from X.org distribution using makelocalealias.py.
    792 #
    793 # These are the differences compared to the old mapping (Python 2.6.5
    794 # and older):
    795 #
    796 #    updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
    797 #    updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
    798 #    updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
    799 #    updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
    800 #    updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
    801 #    updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
    802 #    updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
    803 #    updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
    804 #    updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
    805 #    updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
    806 #    updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
    807 #    updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
    808 #    updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
    809 #
    810 # SS 2013-12-20:
    811 # Updated alias mapping to most recent locale.alias file
    812 # from X.org distribution using makelocalealias.py.
    813 #
    814 # These are the differences compared to the old mapping (Python 3.3.3
    815 # and older):
    816 #
    817 #    updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
    818 #    updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
    819 #    updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
    820 #    updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
    821 #    updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
    822 #    updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
    823 #    updated 'sd' -> 'sd_IN (at] devanagari.UTF-8' to 'sd_IN.UTF-8'
    824 #    updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
    825 #    updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
    826 #    updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
    827 #    updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
    828 #
    829 # SS 2014-10-01:
    830 # Updated alias mapping with glibc 2.19 supported locales.
    831 
    832 locale_alias = {
    833     'a3':                                   'az_AZ.KOI8-C',
    834     'a3_az':                                'az_AZ.KOI8-C',
    835     'a3_az.koic':                           'az_AZ.KOI8-C',
    836     'aa_dj':                                'aa_DJ.ISO8859-1',
    837     'aa_er':                                'aa_ER.UTF-8',
    838     'aa_et':                                'aa_ET.UTF-8',
    839     'af':                                   'af_ZA.ISO8859-1',
    840     'af_za':                                'af_ZA.ISO8859-1',
    841     'am':                                   'am_ET.UTF-8',
    842     'am_et':                                'am_ET.UTF-8',
    843     'american':                             'en_US.ISO8859-1',
    844     'an_es':                                'an_ES.ISO8859-15',
    845     'ar':                                   'ar_AA.ISO8859-6',
    846     'ar_aa':                                'ar_AA.ISO8859-6',
    847     'ar_ae':                                'ar_AE.ISO8859-6',
    848     'ar_bh':                                'ar_BH.ISO8859-6',
    849     'ar_dz':                                'ar_DZ.ISO8859-6',
    850     'ar_eg':                                'ar_EG.ISO8859-6',
    851     'ar_in':                                'ar_IN.UTF-8',
    852     'ar_iq':                                'ar_IQ.ISO8859-6',
    853     'ar_jo':                                'ar_JO.ISO8859-6',
    854     'ar_kw':                                'ar_KW.ISO8859-6',
    855     'ar_lb':                                'ar_LB.ISO8859-6',
    856     'ar_ly':                                'ar_LY.ISO8859-6',
    857     'ar_ma':                                'ar_MA.ISO8859-6',
    858     'ar_om':                                'ar_OM.ISO8859-6',
    859     'ar_qa':                                'ar_QA.ISO8859-6',
    860     'ar_sa':                                'ar_SA.ISO8859-6',
    861     'ar_sd':                                'ar_SD.ISO8859-6',
    862     'ar_sy':                                'ar_SY.ISO8859-6',
    863     'ar_tn':                                'ar_TN.ISO8859-6',
    864     'ar_ye':                                'ar_YE.ISO8859-6',
    865     'arabic':                               'ar_AA.ISO8859-6',
    866     'as':                                   'as_IN.UTF-8',
    867     'as_in':                                'as_IN.UTF-8',
    868     'ast_es':                               'ast_ES.ISO8859-15',
    869     'ayc_pe':                               'ayc_PE.UTF-8',
    870     'az':                                   'az_AZ.ISO8859-9E',
    871     'az_az':                                'az_AZ.ISO8859-9E',
    872     'az_az.iso88599e':                      'az_AZ.ISO8859-9E',
    873     'be':                                   'be_BY.CP1251',
    874     'be@latin':                             'be_BY.UTF-8@latin',
    875     'be_bg.utf8':                           'bg_BG.UTF-8',
    876     'be_by':                                'be_BY.CP1251',
    877     'be_by@latin':                          'be_BY.UTF-8@latin',
    878     'bem_zm':                               'bem_ZM.UTF-8',
    879     'ber_dz':                               'ber_DZ.UTF-8',
    880     'ber_ma':                               'ber_MA.UTF-8',
    881     'bg':                                   'bg_BG.CP1251',
    882     'bg_bg':                                'bg_BG.CP1251',
    883     'bho_in':                               'bho_IN.UTF-8',
    884     'bn_bd':                                'bn_BD.UTF-8',
    885     'bn_in':                                'bn_IN.UTF-8',
    886     'bo_cn':                                'bo_CN.UTF-8',
    887     'bo_in':                                'bo_IN.UTF-8',
    888     'bokmal':                               'nb_NO.ISO8859-1',
    889     'bokm\xe5l':                            'nb_NO.ISO8859-1',
    890     'br':                                   'br_FR.ISO8859-1',
    891     'br_fr':                                'br_FR.ISO8859-1',
    892     'brx_in':                               'brx_IN.UTF-8',
    893     'bs':                                   'bs_BA.ISO8859-2',
    894     'bs_ba':                                'bs_BA.ISO8859-2',
    895     'bulgarian':                            'bg_BG.CP1251',
    896     'byn_er':                               'byn_ER.UTF-8',
    897     'c':                                    'C',
    898     'c-french':                             'fr_CA.ISO8859-1',
    899     'c.ascii':                              'C',
    900     'c.en':                                 'C',
    901     'c.iso88591':                           'en_US.ISO8859-1',
    902     'c.utf8':                               'en_US.UTF-8',
    903     'c_c':                                  'C',
    904     'c_c.c':                                'C',
    905     'ca':                                   'ca_ES.ISO8859-1',
    906     'ca_ad':                                'ca_AD.ISO8859-1',
    907     'ca_es':                                'ca_ES.ISO8859-1',
    908     'ca_es@valencia':                       'ca_ES.ISO8859-15@valencia',
    909     'ca_fr':                                'ca_FR.ISO8859-1',
    910     'ca_it':                                'ca_IT.ISO8859-1',
    911     'catalan':                              'ca_ES.ISO8859-1',
    912     'cextend':                              'en_US.ISO8859-1',
    913     'chinese-s':                            'zh_CN.eucCN',
    914     'chinese-t':                            'zh_TW.eucTW',
    915     'crh_ua':                               'crh_UA.UTF-8',
    916     'croatian':                             'hr_HR.ISO8859-2',
    917     'cs':                                   'cs_CZ.ISO8859-2',
    918     'cs_cs':                                'cs_CZ.ISO8859-2',
    919     'cs_cz':                                'cs_CZ.ISO8859-2',
    920     'csb_pl':                               'csb_PL.UTF-8',
    921     'cv_ru':                                'cv_RU.UTF-8',
    922     'cy':                                   'cy_GB.ISO8859-1',
    923     'cy_gb':                                'cy_GB.ISO8859-1',
    924     'cz':                                   'cs_CZ.ISO8859-2',
    925     'cz_cz':                                'cs_CZ.ISO8859-2',
    926     'czech':                                'cs_CZ.ISO8859-2',
    927     'da':                                   'da_DK.ISO8859-1',
    928     'da_dk':                                'da_DK.ISO8859-1',
    929     'danish':                               'da_DK.ISO8859-1',
    930     'dansk':                                'da_DK.ISO8859-1',
    931     'de':                                   'de_DE.ISO8859-1',
    932     'de_at':                                'de_AT.ISO8859-1',
    933     'de_be':                                'de_BE.ISO8859-1',
    934     'de_ch':                                'de_CH.ISO8859-1',
    935     'de_de':                                'de_DE.ISO8859-1',
    936     'de_li.utf8':                           'de_LI.UTF-8',
    937     'de_lu':                                'de_LU.ISO8859-1',
    938     'deutsch':                              'de_DE.ISO8859-1',
    939     'doi_in':                               'doi_IN.UTF-8',
    940     'dutch':                                'nl_NL.ISO8859-1',
    941     'dutch.iso88591':                       'nl_BE.ISO8859-1',
    942     'dv_mv':                                'dv_MV.UTF-8',
    943     'dz_bt':                                'dz_BT.UTF-8',
    944     'ee':                                   'ee_EE.ISO8859-4',
    945     'ee_ee':                                'ee_EE.ISO8859-4',
    946     'eesti':                                'et_EE.ISO8859-1',
    947     'el':                                   'el_GR.ISO8859-7',
    948     'el_cy':                                'el_CY.ISO8859-7',
    949     'el_gr':                                'el_GR.ISO8859-7',
    950     'el_gr@euro':                           'el_GR.ISO8859-15',
    951     'en':                                   'en_US.ISO8859-1',
    952     'en_ag':                                'en_AG.UTF-8',
    953     'en_au':                                'en_AU.ISO8859-1',
    954     'en_be':                                'en_BE.ISO8859-1',
    955     'en_bw':                                'en_BW.ISO8859-1',
    956     'en_ca':                                'en_CA.ISO8859-1',
    957     'en_dk':                                'en_DK.ISO8859-1',
    958     'en_dl.utf8':                           'en_DL.UTF-8',
    959     'en_gb':                                'en_GB.ISO8859-1',
    960     'en_hk':                                'en_HK.ISO8859-1',
    961     'en_ie':                                'en_IE.ISO8859-1',
    962     'en_in':                                'en_IN.ISO8859-1',
    963     'en_ng':                                'en_NG.UTF-8',
    964     'en_nz':                                'en_NZ.ISO8859-1',
    965     'en_ph':                                'en_PH.ISO8859-1',
    966     'en_sg':                                'en_SG.ISO8859-1',
    967     'en_uk':                                'en_GB.ISO8859-1',
    968     'en_us':                                'en_US.ISO8859-1',
    969     'en_us@euro@euro':                      'en_US.ISO8859-15',
    970     'en_za':                                'en_ZA.ISO8859-1',
    971     'en_zm':                                'en_ZM.UTF-8',
    972     'en_zw':                                'en_ZW.ISO8859-1',
    973     'en_zw.utf8':                           'en_ZS.UTF-8',
    974     'eng_gb':                               'en_GB.ISO8859-1',
    975     'english':                              'en_EN.ISO8859-1',
    976     'english_uk':                           'en_GB.ISO8859-1',
    977     'english_united-states':                'en_US.ISO8859-1',
    978     'english_united-states.437':            'C',
    979     'english_us':                           'en_US.ISO8859-1',
    980     'eo':                                   'eo_XX.ISO8859-3',
    981     'eo.utf8':                              'eo.UTF-8',
    982     'eo_eo':                                'eo_EO.ISO8859-3',
    983     'eo_us.utf8':                           'eo_US.UTF-8',
    984     'eo_xx':                                'eo_XX.ISO8859-3',
    985     'es':                                   'es_ES.ISO8859-1',
    986     'es_ar':                                'es_AR.ISO8859-1',
    987     'es_bo':                                'es_BO.ISO8859-1',
    988     'es_cl':                                'es_CL.ISO8859-1',
    989     'es_co':                                'es_CO.ISO8859-1',
    990     'es_cr':                                'es_CR.ISO8859-1',
    991     'es_cu':                                'es_CU.UTF-8',
    992     'es_do':                                'es_DO.ISO8859-1',
    993     'es_ec':                                'es_EC.ISO8859-1',
    994     'es_es':                                'es_ES.ISO8859-1',
    995     'es_gt':                                'es_GT.ISO8859-1',
    996     'es_hn':                                'es_HN.ISO8859-1',
    997     'es_mx':                                'es_MX.ISO8859-1',
    998     'es_ni':                                'es_NI.ISO8859-1',
    999     'es_pa':                                'es_PA.ISO8859-1',
   1000     'es_pe':                                'es_PE.ISO8859-1',
   1001     'es_pr':                                'es_PR.ISO8859-1',
   1002     'es_py':                                'es_PY.ISO8859-1',
   1003     'es_sv':                                'es_SV.ISO8859-1',
   1004     'es_us':                                'es_US.ISO8859-1',
   1005     'es_uy':                                'es_UY.ISO8859-1',
   1006     'es_ve':                                'es_VE.ISO8859-1',
   1007     'estonian':                             'et_EE.ISO8859-1',
   1008     'et':                                   'et_EE.ISO8859-15',
   1009     'et_ee':                                'et_EE.ISO8859-15',
   1010     'eu':                                   'eu_ES.ISO8859-1',
   1011     'eu_es':                                'eu_ES.ISO8859-1',
   1012     'eu_fr':                                'eu_FR.ISO8859-1',
   1013     'fa':                                   'fa_IR.UTF-8',
   1014     'fa_ir':                                'fa_IR.UTF-8',
   1015     'fa_ir.isiri3342':                      'fa_IR.ISIRI-3342',
   1016     'ff_sn':                                'ff_SN.UTF-8',
   1017     'fi':                                   'fi_FI.ISO8859-15',
   1018     'fi_fi':                                'fi_FI.ISO8859-15',
   1019     'fil_ph':                               'fil_PH.UTF-8',
   1020     'finnish':                              'fi_FI.ISO8859-1',
   1021     'fo':                                   'fo_FO.ISO8859-1',
   1022     'fo_fo':                                'fo_FO.ISO8859-1',
   1023     'fr':                                   'fr_FR.ISO8859-1',
   1024     'fr_be':                                'fr_BE.ISO8859-1',
   1025     'fr_ca':                                'fr_CA.ISO8859-1',
   1026     'fr_ch':                                'fr_CH.ISO8859-1',
   1027     'fr_fr':                                'fr_FR.ISO8859-1',
   1028     'fr_lu':                                'fr_LU.ISO8859-1',
   1029     'fran\xe7ais':                          'fr_FR.ISO8859-1',
   1030     'fre_fr':                               'fr_FR.ISO8859-1',
   1031     'french':                               'fr_FR.ISO8859-1',
   1032     'french.iso88591':                      'fr_CH.ISO8859-1',
   1033     'french_france':                        'fr_FR.ISO8859-1',
   1034     'fur_it':                               'fur_IT.UTF-8',
   1035     'fy_de':                                'fy_DE.UTF-8',
   1036     'fy_nl':                                'fy_NL.UTF-8',
   1037     'ga':                                   'ga_IE.ISO8859-1',
   1038     'ga_ie':                                'ga_IE.ISO8859-1',
   1039     'galego':                               'gl_ES.ISO8859-1',
   1040     'galician':                             'gl_ES.ISO8859-1',
   1041     'gd':                                   'gd_GB.ISO8859-1',
   1042     'gd_gb':                                'gd_GB.ISO8859-1',
   1043     'ger_de':                               'de_DE.ISO8859-1',
   1044     'german':                               'de_DE.ISO8859-1',
   1045     'german.iso88591':                      'de_CH.ISO8859-1',
   1046     'german_germany':                       'de_DE.ISO8859-1',
   1047     'gez_er':                               'gez_ER.UTF-8',
   1048     'gez_et':                               'gez_ET.UTF-8',
   1049     'gl':                                   'gl_ES.ISO8859-1',
   1050     'gl_es':                                'gl_ES.ISO8859-1',
   1051     'greek':                                'el_GR.ISO8859-7',
   1052     'gu_in':                                'gu_IN.UTF-8',
   1053     'gv':                                   'gv_GB.ISO8859-1',
   1054     'gv_gb':                                'gv_GB.ISO8859-1',
   1055     'ha_ng':                                'ha_NG.UTF-8',
   1056     'he':                                   'he_IL.ISO8859-8',
   1057     'he_il':                                'he_IL.ISO8859-8',
   1058     'hebrew':                               'he_IL.ISO8859-8',
   1059     'hi':                                   'hi_IN.ISCII-DEV',
   1060     'hi_in':                                'hi_IN.ISCII-DEV',
   1061     'hi_in.isciidev':                       'hi_IN.ISCII-DEV',
   1062     'hne':                                  'hne_IN.UTF-8',
   1063     'hne_in':                               'hne_IN.UTF-8',
   1064     'hr':                                   'hr_HR.ISO8859-2',
   1065     'hr_hr':                                'hr_HR.ISO8859-2',
   1066     'hrvatski':                             'hr_HR.ISO8859-2',
   1067     'hsb_de':                               'hsb_DE.ISO8859-2',
   1068     'ht_ht':                                'ht_HT.UTF-8',
   1069     'hu':                                   'hu_HU.ISO8859-2',
   1070     'hu_hu':                                'hu_HU.ISO8859-2',
   1071     'hungarian':                            'hu_HU.ISO8859-2',
   1072     'hy_am':                                'hy_AM.UTF-8',
   1073     'hy_am.armscii8':                       'hy_AM.ARMSCII_8',
   1074     'ia':                                   'ia.UTF-8',
   1075     'ia_fr':                                'ia_FR.UTF-8',
   1076     'icelandic':                            'is_IS.ISO8859-1',
   1077     'id':                                   'id_ID.ISO8859-1',
   1078     'id_id':                                'id_ID.ISO8859-1',
   1079     'ig_ng':                                'ig_NG.UTF-8',
   1080     'ik_ca':                                'ik_CA.UTF-8',
   1081     'in':                                   'id_ID.ISO8859-1',
   1082     'in_id':                                'id_ID.ISO8859-1',
   1083     'is':                                   'is_IS.ISO8859-1',
   1084     'is_is':                                'is_IS.ISO8859-1',
   1085     'iso-8859-1':                           'en_US.ISO8859-1',
   1086     'iso-8859-15':                          'en_US.ISO8859-15',
   1087     'iso8859-1':                            'en_US.ISO8859-1',
   1088     'iso8859-15':                           'en_US.ISO8859-15',
   1089     'iso_8859_1':                           'en_US.ISO8859-1',
   1090     'iso_8859_15':                          'en_US.ISO8859-15',
   1091     'it':                                   'it_IT.ISO8859-1',
   1092     'it_ch':                                'it_CH.ISO8859-1',
   1093     'it_it':                                'it_IT.ISO8859-1',
   1094     'italian':                              'it_IT.ISO8859-1',
   1095     'iu':                                   'iu_CA.NUNACOM-8',
   1096     'iu_ca':                                'iu_CA.NUNACOM-8',
   1097     'iu_ca.nunacom8':                       'iu_CA.NUNACOM-8',
   1098     'iw':                                   'he_IL.ISO8859-8',
   1099     'iw_il':                                'he_IL.ISO8859-8',
   1100     'iw_il.utf8':                           'iw_IL.UTF-8',
   1101     'ja':                                   'ja_JP.eucJP',
   1102     'ja_jp':                                'ja_JP.eucJP',
   1103     'ja_jp.euc':                            'ja_JP.eucJP',
   1104     'ja_jp.mscode':                         'ja_JP.SJIS',
   1105     'ja_jp.pck':                            'ja_JP.SJIS',
   1106     'japan':                                'ja_JP.eucJP',
   1107     'japanese':                             'ja_JP.eucJP',
   1108     'japanese-euc':                         'ja_JP.eucJP',
   1109     'japanese.euc':                         'ja_JP.eucJP',
   1110     'jp_jp':                                'ja_JP.eucJP',
   1111     'ka':                                   'ka_GE.GEORGIAN-ACADEMY',
   1112     'ka_ge':                                'ka_GE.GEORGIAN-ACADEMY',
   1113     'ka_ge.georgianacademy':                'ka_GE.GEORGIAN-ACADEMY',
   1114     'ka_ge.georgianps':                     'ka_GE.GEORGIAN-PS',
   1115     'ka_ge.georgianrs':                     'ka_GE.GEORGIAN-ACADEMY',
   1116     'kk_kz':                                'kk_KZ.RK1048',
   1117     'kl':                                   'kl_GL.ISO8859-1',
   1118     'kl_gl':                                'kl_GL.ISO8859-1',
   1119     'km_kh':                                'km_KH.UTF-8',
   1120     'kn':                                   'kn_IN.UTF-8',
   1121     'kn_in':                                'kn_IN.UTF-8',
   1122     'ko':                                   'ko_KR.eucKR',
   1123     'ko_kr':                                'ko_KR.eucKR',
   1124     'ko_kr.euc':                            'ko_KR.eucKR',
   1125     'kok_in':                               'kok_IN.UTF-8',
   1126     'korean':                               'ko_KR.eucKR',
   1127     'korean.euc':                           'ko_KR.eucKR',
   1128     'ks':                                   'ks_IN.UTF-8',
   1129     'ks_in':                                'ks_IN.UTF-8',
   1130     'ks_in (at] devanagari.utf8':                'ks_IN.UTF-8@devanagari',
   1131     'ku_tr':                                'ku_TR.ISO8859-9',
   1132     'kw':                                   'kw_GB.ISO8859-1',
   1133     'kw_gb':                                'kw_GB.ISO8859-1',
   1134     'ky':                                   'ky_KG.UTF-8',
   1135     'ky_kg':                                'ky_KG.UTF-8',
   1136     'lb_lu':                                'lb_LU.UTF-8',
   1137     'lg_ug':                                'lg_UG.ISO8859-10',
   1138     'li_be':                                'li_BE.UTF-8',
   1139     'li_nl':                                'li_NL.UTF-8',
   1140     'lij_it':                               'lij_IT.UTF-8',
   1141     'lithuanian':                           'lt_LT.ISO8859-13',
   1142     'lo':                                   'lo_LA.MULELAO-1',
   1143     'lo_la':                                'lo_LA.MULELAO-1',
   1144     'lo_la.cp1133':                         'lo_LA.IBM-CP1133',
   1145     'lo_la.ibmcp1133':                      'lo_LA.IBM-CP1133',
   1146     'lo_la.mulelao1':                       'lo_LA.MULELAO-1',
   1147     'lt':                                   'lt_LT.ISO8859-13',
   1148     'lt_lt':                                'lt_LT.ISO8859-13',
   1149     'lv':                                   'lv_LV.ISO8859-13',
   1150     'lv_lv':                                'lv_LV.ISO8859-13',
   1151     'mag_in':                               'mag_IN.UTF-8',
   1152     'mai':                                  'mai_IN.UTF-8',
   1153     'mai_in':                               'mai_IN.UTF-8',
   1154     'mg_mg':                                'mg_MG.ISO8859-15',
   1155     'mhr_ru':                               'mhr_RU.UTF-8',
   1156     'mi':                                   'mi_NZ.ISO8859-1',
   1157     'mi_nz':                                'mi_NZ.ISO8859-1',
   1158     'mk':                                   'mk_MK.ISO8859-5',
   1159     'mk_mk':                                'mk_MK.ISO8859-5',
   1160     'ml':                                   'ml_IN.UTF-8',
   1161     'ml_in':                                'ml_IN.UTF-8',
   1162     'mn_mn':                                'mn_MN.UTF-8',
   1163     'mni_in':                               'mni_IN.UTF-8',
   1164     'mr':                                   'mr_IN.UTF-8',
   1165     'mr_in':                                'mr_IN.UTF-8',
   1166     'ms':                                   'ms_MY.ISO8859-1',
   1167     'ms_my':                                'ms_MY.ISO8859-1',
   1168     'mt':                                   'mt_MT.ISO8859-3',
   1169     'mt_mt':                                'mt_MT.ISO8859-3',
   1170     'my_mm':                                'my_MM.UTF-8',
   1171     'nan_tw@latin':                         'nan_TW.UTF-8@latin',
   1172     'nb':                                   'nb_NO.ISO8859-1',
   1173     'nb_no':                                'nb_NO.ISO8859-1',
   1174     'nds_de':                               'nds_DE.UTF-8',
   1175     'nds_nl':                               'nds_NL.UTF-8',
   1176     'ne_np':                                'ne_NP.UTF-8',
   1177     'nhn_mx':                               'nhn_MX.UTF-8',
   1178     'niu_nu':                               'niu_NU.UTF-8',
   1179     'niu_nz':                               'niu_NZ.UTF-8',
   1180     'nl':                                   'nl_NL.ISO8859-1',
   1181     'nl_aw':                                'nl_AW.UTF-8',
   1182     'nl_be':                                'nl_BE.ISO8859-1',
   1183     'nl_nl':                                'nl_NL.ISO8859-1',
   1184     'nn':                                   'nn_NO.ISO8859-1',
   1185     'nn_no':                                'nn_NO.ISO8859-1',
   1186     'no':                                   'no_NO.ISO8859-1',
   1187     'no@nynorsk':                           'ny_NO.ISO8859-1',
   1188     'no_no':                                'no_NO.ISO8859-1',
   1189     'no_no.iso88591@bokmal':                'no_NO.ISO8859-1',
   1190     'no_no.iso88591@nynorsk':               'no_NO.ISO8859-1',
   1191     'norwegian':                            'no_NO.ISO8859-1',
   1192     'nr':                                   'nr_ZA.ISO8859-1',
   1193     'nr_za':                                'nr_ZA.ISO8859-1',
   1194     'nso':                                  'nso_ZA.ISO8859-15',
   1195     'nso_za':                               'nso_ZA.ISO8859-15',
   1196     'ny':                                   'ny_NO.ISO8859-1',
   1197     'ny_no':                                'ny_NO.ISO8859-1',
   1198     'nynorsk':                              'nn_NO.ISO8859-1',
   1199     'oc':                                   'oc_FR.ISO8859-1',
   1200     'oc_fr':                                'oc_FR.ISO8859-1',
   1201     'om_et':                                'om_ET.UTF-8',
   1202     'om_ke':                                'om_KE.ISO8859-1',
   1203     'or':                                   'or_IN.UTF-8',
   1204     'or_in':                                'or_IN.UTF-8',
   1205     'os_ru':                                'os_RU.UTF-8',
   1206     'pa':                                   'pa_IN.UTF-8',
   1207     'pa_in':                                'pa_IN.UTF-8',
   1208     'pa_pk':                                'pa_PK.UTF-8',
   1209     'pap_an':                               'pap_AN.UTF-8',
   1210     'pd':                                   'pd_US.ISO8859-1',
   1211     'pd_de':                                'pd_DE.ISO8859-1',
   1212     'pd_us':                                'pd_US.ISO8859-1',
   1213     'ph':                                   'ph_PH.ISO8859-1',
   1214     'ph_ph':                                'ph_PH.ISO8859-1',
   1215     'pl':                                   'pl_PL.ISO8859-2',
   1216     'pl_pl':                                'pl_PL.ISO8859-2',
   1217     'polish':                               'pl_PL.ISO8859-2',
   1218     'portuguese':                           'pt_PT.ISO8859-1',
   1219     'portuguese_brazil':                    'pt_BR.ISO8859-1',
   1220     'posix':                                'C',
   1221     'posix-utf2':                           'C',
   1222     'pp':                                   'pp_AN.ISO8859-1',
   1223     'pp_an':                                'pp_AN.ISO8859-1',
   1224     'ps_af':                                'ps_AF.UTF-8',
   1225     'pt':                                   'pt_PT.ISO8859-1',
   1226     'pt_br':                                'pt_BR.ISO8859-1',
   1227     'pt_pt':                                'pt_PT.ISO8859-1',
   1228     'ro':                                   'ro_RO.ISO8859-2',
   1229     'ro_ro':                                'ro_RO.ISO8859-2',
   1230     'romanian':                             'ro_RO.ISO8859-2',
   1231     'ru':                                   'ru_RU.UTF-8',
   1232     'ru_ru':                                'ru_RU.UTF-8',
   1233     'ru_ua':                                'ru_UA.KOI8-U',
   1234     'rumanian':                             'ro_RO.ISO8859-2',
   1235     'russian':                              'ru_RU.ISO8859-5',
   1236     'rw':                                   'rw_RW.ISO8859-1',
   1237     'rw_rw':                                'rw_RW.ISO8859-1',
   1238     'sa_in':                                'sa_IN.UTF-8',
   1239     'sat_in':                               'sat_IN.UTF-8',
   1240     'sc_it':                                'sc_IT.UTF-8',
   1241     'sd':                                   'sd_IN.UTF-8',
   1242     'sd_in':                                'sd_IN.UTF-8',
   1243     'sd_in (at] devanagari.utf8':                'sd_IN.UTF-8@devanagari',
   1244     'sd_pk':                                'sd_PK.UTF-8',
   1245     'se_no':                                'se_NO.UTF-8',
   1246     'serbocroatian':                        'sr_RS.UTF-8@latin',
   1247     'sh':                                   'sr_RS.UTF-8@latin',
   1248     'sh_ba.iso88592@bosnia':                'sr_CS.ISO8859-2',
   1249     'sh_hr':                                'sh_HR.ISO8859-2',
   1250     'sh_hr.iso88592':                       'hr_HR.ISO8859-2',
   1251     'sh_sp':                                'sr_CS.ISO8859-2',
   1252     'sh_yu':                                'sr_RS.UTF-8@latin',
   1253     'shs_ca':                               'shs_CA.UTF-8',
   1254     'si':                                   'si_LK.UTF-8',
   1255     'si_lk':                                'si_LK.UTF-8',
   1256     'sid_et':                               'sid_ET.UTF-8',
   1257     'sinhala':                              'si_LK.UTF-8',
   1258     'sk':                                   'sk_SK.ISO8859-2',
   1259     'sk_sk':                                'sk_SK.ISO8859-2',
   1260     'sl':                                   'sl_SI.ISO8859-2',
   1261     'sl_cs':                                'sl_CS.ISO8859-2',
   1262     'sl_si':                                'sl_SI.ISO8859-2',
   1263     'slovak':                               'sk_SK.ISO8859-2',
   1264     'slovene':                              'sl_SI.ISO8859-2',
   1265     'slovenian':                            'sl_SI.ISO8859-2',
   1266     'so_dj':                                'so_DJ.ISO8859-1',
   1267     'so_et':                                'so_ET.UTF-8',
   1268     'so_ke':                                'so_KE.ISO8859-1',
   1269     'so_so':                                'so_SO.ISO8859-1',
   1270     'sp':                                   'sr_CS.ISO8859-5',
   1271     'sp_yu':                                'sr_CS.ISO8859-5',
   1272     'spanish':                              'es_ES.ISO8859-1',
   1273     'spanish_spain':                        'es_ES.ISO8859-1',
   1274     'sq':                                   'sq_AL.ISO8859-2',
   1275     'sq_al':                                'sq_AL.ISO8859-2',
   1276     'sq_mk':                                'sq_MK.UTF-8',
   1277     'sr':                                   'sr_RS.UTF-8',
   1278     'sr@cyrillic':                          'sr_RS.UTF-8',
   1279     'sr@latn':                              'sr_CS.UTF-8@latin',
   1280     'sr_cs':                                'sr_CS.UTF-8',
   1281     'sr_cs.iso88592@latn':                  'sr_CS.ISO8859-2',
   1282     'sr_cs@latn':                           'sr_CS.UTF-8@latin',
   1283     'sr_me':                                'sr_ME.UTF-8',
   1284     'sr_rs':                                'sr_RS.UTF-8',
   1285     'sr_rs@latn':                           'sr_RS.UTF-8@latin',
   1286     'sr_sp':                                'sr_CS.ISO8859-2',
   1287     'sr_yu':                                'sr_RS.UTF-8@latin',
   1288     'sr_yu.cp1251@cyrillic':                'sr_CS.CP1251',
   1289     'sr_yu.iso88592':                       'sr_CS.ISO8859-2',
   1290     'sr_yu.iso88595':                       'sr_CS.ISO8859-5',
   1291     'sr_yu.iso88595@cyrillic':              'sr_CS.ISO8859-5',
   1292     'sr_yu.microsoftcp1251@cyrillic':       'sr_CS.CP1251',
   1293     'sr_yu.utf8':                           'sr_RS.UTF-8',
   1294     'sr_yu.utf8@cyrillic':                  'sr_RS.UTF-8',
   1295     'sr_yu@cyrillic':                       'sr_RS.UTF-8',
   1296     'ss':                                   'ss_ZA.ISO8859-1',
   1297     'ss_za':                                'ss_ZA.ISO8859-1',
   1298     'st':                                   'st_ZA.ISO8859-1',
   1299     'st_za':                                'st_ZA.ISO8859-1',
   1300     'sv':                                   'sv_SE.ISO8859-1',
   1301     'sv_fi':                                'sv_FI.ISO8859-1',
   1302     'sv_se':                                'sv_SE.ISO8859-1',
   1303     'sw_ke':                                'sw_KE.UTF-8',
   1304     'sw_tz':                                'sw_TZ.UTF-8',
   1305     'swedish':                              'sv_SE.ISO8859-1',
   1306     'szl_pl':                               'szl_PL.UTF-8',
   1307     'ta':                                   'ta_IN.TSCII-0',
   1308     'ta_in':                                'ta_IN.TSCII-0',
   1309     'ta_in.tscii':                          'ta_IN.TSCII-0',
   1310     'ta_in.tscii0':                         'ta_IN.TSCII-0',
   1311     'ta_lk':                                'ta_LK.UTF-8',
   1312     'te':                                   'te_IN.UTF-8',
   1313     'te_in':                                'te_IN.UTF-8',
   1314     'tg':                                   'tg_TJ.KOI8-C',
   1315     'tg_tj':                                'tg_TJ.KOI8-C',
   1316     'th':                                   'th_TH.ISO8859-11',
   1317     'th_th':                                'th_TH.ISO8859-11',
   1318     'th_th.tactis':                         'th_TH.TIS620',
   1319     'th_th.tis620':                         'th_TH.TIS620',
   1320     'thai':                                 'th_TH.ISO8859-11',
   1321     'ti_er':                                'ti_ER.UTF-8',
   1322     'ti_et':                                'ti_ET.UTF-8',
   1323     'tig_er':                               'tig_ER.UTF-8',
   1324     'tk_tm':                                'tk_TM.UTF-8',
   1325     'tl':                                   'tl_PH.ISO8859-1',
   1326     'tl_ph':                                'tl_PH.ISO8859-1',
   1327     'tn':                                   'tn_ZA.ISO8859-15',
   1328     'tn_za':                                'tn_ZA.ISO8859-15',
   1329     'tr':                                   'tr_TR.ISO8859-9',
   1330     'tr_cy':                                'tr_CY.ISO8859-9',
   1331     'tr_tr':                                'tr_TR.ISO8859-9',
   1332     'ts':                                   'ts_ZA.ISO8859-1',
   1333     'ts_za':                                'ts_ZA.ISO8859-1',
   1334     'tt':                                   'tt_RU.TATAR-CYR',
   1335     'tt_ru':                                'tt_RU.TATAR-CYR',
   1336     'tt_ru.tatarcyr':                       'tt_RU.TATAR-CYR',
   1337     'tt_ru@iqtelif':                        'tt_RU.UTF-8@iqtelif',
   1338     'turkish':                              'tr_TR.ISO8859-9',
   1339     'ug_cn':                                'ug_CN.UTF-8',
   1340     'uk':                                   'uk_UA.KOI8-U',
   1341     'uk_ua':                                'uk_UA.KOI8-U',
   1342     'univ':                                 'en_US.utf',
   1343     'universal':                            'en_US.utf',
   1344     'universal.utf8@ucs4':                  'en_US.UTF-8',
   1345     'unm_us':                               'unm_US.UTF-8',
   1346     'ur':                                   'ur_PK.CP1256',
   1347     'ur_in':                                'ur_IN.UTF-8',
   1348     'ur_pk':                                'ur_PK.CP1256',
   1349     'uz':                                   'uz_UZ.UTF-8',
   1350     'uz_uz':                                'uz_UZ.UTF-8',
   1351     'uz_uz@cyrillic':                       'uz_UZ.UTF-8',
   1352     've':                                   've_ZA.UTF-8',
   1353     've_za':                                've_ZA.UTF-8',
   1354     'vi':                                   'vi_VN.TCVN',
   1355     'vi_vn':                                'vi_VN.TCVN',
   1356     'vi_vn.tcvn':                           'vi_VN.TCVN',
   1357     'vi_vn.tcvn5712':                       'vi_VN.TCVN',
   1358     'vi_vn.viscii':                         'vi_VN.VISCII',
   1359     'vi_vn.viscii111':                      'vi_VN.VISCII',
   1360     'wa':                                   'wa_BE.ISO8859-1',
   1361     'wa_be':                                'wa_BE.ISO8859-1',
   1362     'wae_ch':                               'wae_CH.UTF-8',
   1363     'wal_et':                               'wal_ET.UTF-8',
   1364     'wo_sn':                                'wo_SN.UTF-8',
   1365     'xh':                                   'xh_ZA.ISO8859-1',
   1366     'xh_za':                                'xh_ZA.ISO8859-1',
   1367     'yi':                                   'yi_US.CP1255',
   1368     'yi_us':                                'yi_US.CP1255',
   1369     'yo_ng':                                'yo_NG.UTF-8',
   1370     'yue_hk':                               'yue_HK.UTF-8',
   1371     'zh':                                   'zh_CN.eucCN',
   1372     'zh_cn':                                'zh_CN.gb2312',
   1373     'zh_cn.big5':                           'zh_TW.big5',
   1374     'zh_cn.euc':                            'zh_CN.eucCN',
   1375     'zh_hk':                                'zh_HK.big5hkscs',
   1376     'zh_hk.big5hk':                         'zh_HK.big5hkscs',
   1377     'zh_sg':                                'zh_SG.GB2312',
   1378     'zh_sg.gbk':                            'zh_SG.GBK',
   1379     'zh_tw':                                'zh_TW.big5',
   1380     'zh_tw.euc':                            'zh_TW.eucTW',
   1381     'zh_tw.euctw':                          'zh_TW.eucTW',
   1382     'zu':                                   'zu_ZA.ISO8859-1',
   1383     'zu_za':                                'zu_ZA.ISO8859-1',
   1384 }
   1385 
   1386 #
   1387 # This maps Windows language identifiers to locale strings.
   1388 #
   1389 # This list has been updated from
   1390 # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
   1391 # to include every locale up to Windows Vista.
   1392 #
   1393 # NOTE: this mapping is incomplete.  If your language is missing, please
   1394 # submit a bug report to the Python bug tracker at http://bugs.python.org/
   1395 # Make sure you include the missing language identifier and the suggested
   1396 # locale code.
   1397 #
   1398 
   1399 windows_locale = {
   1400     0x0436: "af_ZA", # Afrikaans
   1401     0x041c: "sq_AL", # Albanian
   1402     0x0484: "gsw_FR",# Alsatian - France
   1403     0x045e: "am_ET", # Amharic - Ethiopia
   1404     0x0401: "ar_SA", # Arabic - Saudi Arabia
   1405     0x0801: "ar_IQ", # Arabic - Iraq
   1406     0x0c01: "ar_EG", # Arabic - Egypt
   1407     0x1001: "ar_LY", # Arabic - Libya
   1408     0x1401: "ar_DZ", # Arabic - Algeria
   1409     0x1801: "ar_MA", # Arabic - Morocco
   1410     0x1c01: "ar_TN", # Arabic - Tunisia
   1411     0x2001: "ar_OM", # Arabic - Oman
   1412     0x2401: "ar_YE", # Arabic - Yemen
   1413     0x2801: "ar_SY", # Arabic - Syria
   1414     0x2c01: "ar_JO", # Arabic - Jordan
   1415     0x3001: "ar_LB", # Arabic - Lebanon
   1416     0x3401: "ar_KW", # Arabic - Kuwait
   1417     0x3801: "ar_AE", # Arabic - United Arab Emirates
   1418     0x3c01: "ar_BH", # Arabic - Bahrain
   1419     0x4001: "ar_QA", # Arabic - Qatar
   1420     0x042b: "hy_AM", # Armenian
   1421     0x044d: "as_IN", # Assamese - India
   1422     0x042c: "az_AZ", # Azeri - Latin
   1423     0x082c: "az_AZ", # Azeri - Cyrillic
   1424     0x046d: "ba_RU", # Bashkir
   1425     0x042d: "eu_ES", # Basque - Russia
   1426     0x0423: "be_BY", # Belarusian
   1427     0x0445: "bn_IN", # Begali
   1428     0x201a: "bs_BA", # Bosnian - Cyrillic
   1429     0x141a: "bs_BA", # Bosnian - Latin
   1430     0x047e: "br_FR", # Breton - France
   1431     0x0402: "bg_BG", # Bulgarian
   1432 #    0x0455: "my_MM", # Burmese - Not supported
   1433     0x0403: "ca_ES", # Catalan
   1434     0x0004: "zh_CHS",# Chinese - Simplified
   1435     0x0404: "zh_TW", # Chinese - Taiwan
   1436     0x0804: "zh_CN", # Chinese - PRC
   1437     0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
   1438     0x1004: "zh_SG", # Chinese - Singapore
   1439     0x1404: "zh_MO", # Chinese - Macao S.A.R.
   1440     0x7c04: "zh_CHT",# Chinese - Traditional
   1441     0x0483: "co_FR", # Corsican - France
   1442     0x041a: "hr_HR", # Croatian
   1443     0x101a: "hr_BA", # Croatian - Bosnia
   1444     0x0405: "cs_CZ", # Czech
   1445     0x0406: "da_DK", # Danish
   1446     0x048c: "gbz_AF",# Dari - Afghanistan
   1447     0x0465: "div_MV",# Divehi - Maldives
   1448     0x0413: "nl_NL", # Dutch - The Netherlands
   1449     0x0813: "nl_BE", # Dutch - Belgium
   1450     0x0409: "en_US", # English - United States
   1451     0x0809: "en_GB", # English - United Kingdom
   1452     0x0c09: "en_AU", # English - Australia
   1453     0x1009: "en_CA", # English - Canada
   1454     0x1409: "en_NZ", # English - New Zealand
   1455     0x1809: "en_IE", # English - Ireland
   1456     0x1c09: "en_ZA", # English - South Africa
   1457     0x2009: "en_JA", # English - Jamaica
   1458     0x2409: "en_CB", # English - Caribbean
   1459     0x2809: "en_BZ", # English - Belize
   1460     0x2c09: "en_TT", # English - Trinidad
   1461     0x3009: "en_ZW", # English - Zimbabwe
   1462     0x3409: "en_PH", # English - Philippines
   1463     0x4009: "en_IN", # English - India
   1464     0x4409: "en_MY", # English - Malaysia
   1465     0x4809: "en_IN", # English - Singapore
   1466     0x0425: "et_EE", # Estonian
   1467     0x0438: "fo_FO", # Faroese
   1468     0x0464: "fil_PH",# Filipino
   1469     0x040b: "fi_FI", # Finnish
   1470     0x040c: "fr_FR", # French - France
   1471     0x080c: "fr_BE", # French - Belgium
   1472     0x0c0c: "fr_CA", # French - Canada
   1473     0x100c: "fr_CH", # French - Switzerland
   1474     0x140c: "fr_LU", # French - Luxembourg
   1475     0x180c: "fr_MC", # French - Monaco
   1476     0x0462: "fy_NL", # Frisian - Netherlands
   1477     0x0456: "gl_ES", # Galician
   1478     0x0437: "ka_GE", # Georgian
   1479     0x0407: "de_DE", # German - Germany
   1480     0x0807: "de_CH", # German - Switzerland
   1481     0x0c07: "de_AT", # German - Austria
   1482     0x1007: "de_LU", # German - Luxembourg
   1483     0x1407: "de_LI", # German - Liechtenstein
   1484     0x0408: "el_GR", # Greek
   1485     0x046f: "kl_GL", # Greenlandic - Greenland
   1486     0x0447: "gu_IN", # Gujarati
   1487     0x0468: "ha_NG", # Hausa - Latin
   1488     0x040d: "he_IL", # Hebrew
   1489     0x0439: "hi_IN", # Hindi
   1490     0x040e: "hu_HU", # Hungarian
   1491     0x040f: "is_IS", # Icelandic
   1492     0x0421: "id_ID", # Indonesian
   1493     0x045d: "iu_CA", # Inuktitut - Syllabics
   1494     0x085d: "iu_CA", # Inuktitut - Latin
   1495     0x083c: "ga_IE", # Irish - Ireland
   1496     0x0410: "it_IT", # Italian - Italy
   1497     0x0810: "it_CH", # Italian - Switzerland
   1498     0x0411: "ja_JP", # Japanese
   1499     0x044b: "kn_IN", # Kannada - India
   1500     0x043f: "kk_KZ", # Kazakh
   1501     0x0453: "kh_KH", # Khmer - Cambodia
   1502     0x0486: "qut_GT",# K'iche - Guatemala
   1503     0x0487: "rw_RW", # Kinyarwanda - Rwanda
   1504     0x0457: "kok_IN",# Konkani
   1505     0x0412: "ko_KR", # Korean
   1506     0x0440: "ky_KG", # Kyrgyz
   1507     0x0454: "lo_LA", # Lao - Lao PDR
   1508     0x0426: "lv_LV", # Latvian
   1509     0x0427: "lt_LT", # Lithuanian
   1510     0x082e: "dsb_DE",# Lower Sorbian - Germany
   1511     0x046e: "lb_LU", # Luxembourgish
   1512     0x042f: "mk_MK", # FYROM Macedonian
   1513     0x043e: "ms_MY", # Malay - Malaysia
   1514     0x083e: "ms_BN", # Malay - Brunei Darussalam
   1515     0x044c: "ml_IN", # Malayalam - India
   1516     0x043a: "mt_MT", # Maltese
   1517     0x0481: "mi_NZ", # Maori
   1518     0x047a: "arn_CL",# Mapudungun
   1519     0x044e: "mr_IN", # Marathi
   1520     0x047c: "moh_CA",# Mohawk - Canada
   1521     0x0450: "mn_MN", # Mongolian - Cyrillic
   1522     0x0850: "mn_CN", # Mongolian - PRC
   1523     0x0461: "ne_NP", # Nepali
   1524     0x0414: "nb_NO", # Norwegian - Bokmal
   1525     0x0814: "nn_NO", # Norwegian - Nynorsk
   1526     0x0482: "oc_FR", # Occitan - France
   1527     0x0448: "or_IN", # Oriya - India
   1528     0x0463: "ps_AF", # Pashto - Afghanistan
   1529     0x0429: "fa_IR", # Persian
   1530     0x0415: "pl_PL", # Polish
   1531     0x0416: "pt_BR", # Portuguese - Brazil
   1532     0x0816: "pt_PT", # Portuguese - Portugal
   1533     0x0446: "pa_IN", # Punjabi
   1534     0x046b: "quz_BO",# Quechua (Bolivia)
   1535     0x086b: "quz_EC",# Quechua (Ecuador)
   1536     0x0c6b: "quz_PE",# Quechua (Peru)
   1537     0x0418: "ro_RO", # Romanian - Romania
   1538     0x0417: "rm_CH", # Romansh
   1539     0x0419: "ru_RU", # Russian
   1540     0x243b: "smn_FI",# Sami Finland
   1541     0x103b: "smj_NO",# Sami Norway
   1542     0x143b: "smj_SE",# Sami Sweden
   1543     0x043b: "se_NO", # Sami Northern Norway
   1544     0x083b: "se_SE", # Sami Northern Sweden
   1545     0x0c3b: "se_FI", # Sami Northern Finland
   1546     0x203b: "sms_FI",# Sami Skolt
   1547     0x183b: "sma_NO",# Sami Southern Norway
   1548     0x1c3b: "sma_SE",# Sami Southern Sweden
   1549     0x044f: "sa_IN", # Sanskrit
   1550     0x0c1a: "sr_SP", # Serbian - Cyrillic
   1551     0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
   1552     0x081a: "sr_SP", # Serbian - Latin
   1553     0x181a: "sr_BA", # Serbian - Bosnia Latin
   1554     0x045b: "si_LK", # Sinhala - Sri Lanka
   1555     0x046c: "ns_ZA", # Northern Sotho
   1556     0x0432: "tn_ZA", # Setswana - Southern Africa
   1557     0x041b: "sk_SK", # Slovak
   1558     0x0424: "sl_SI", # Slovenian
   1559     0x040a: "es_ES", # Spanish - Spain
   1560     0x080a: "es_MX", # Spanish - Mexico
   1561     0x0c0a: "es_ES", # Spanish - Spain (Modern)
   1562     0x100a: "es_GT", # Spanish - Guatemala
   1563     0x140a: "es_CR", # Spanish - Costa Rica
   1564     0x180a: "es_PA", # Spanish - Panama
   1565     0x1c0a: "es_DO", # Spanish - Dominican Republic
   1566     0x200a: "es_VE", # Spanish - Venezuela
   1567     0x240a: "es_CO", # Spanish - Colombia
   1568     0x280a: "es_PE", # Spanish - Peru
   1569     0x2c0a: "es_AR", # Spanish - Argentina
   1570     0x300a: "es_EC", # Spanish - Ecuador
   1571     0x340a: "es_CL", # Spanish - Chile
   1572     0x380a: "es_UR", # Spanish - Uruguay
   1573     0x3c0a: "es_PY", # Spanish - Paraguay
   1574     0x400a: "es_BO", # Spanish - Bolivia
   1575     0x440a: "es_SV", # Spanish - El Salvador
   1576     0x480a: "es_HN", # Spanish - Honduras
   1577     0x4c0a: "es_NI", # Spanish - Nicaragua
   1578     0x500a: "es_PR", # Spanish - Puerto Rico
   1579     0x540a: "es_US", # Spanish - United States
   1580 #    0x0430: "", # Sutu - Not supported
   1581     0x0441: "sw_KE", # Swahili
   1582     0x041d: "sv_SE", # Swedish - Sweden
   1583     0x081d: "sv_FI", # Swedish - Finland
   1584     0x045a: "syr_SY",# Syriac
   1585     0x0428: "tg_TJ", # Tajik - Cyrillic
   1586     0x085f: "tmz_DZ",# Tamazight - Latin
   1587     0x0449: "ta_IN", # Tamil
   1588     0x0444: "tt_RU", # Tatar
   1589     0x044a: "te_IN", # Telugu
   1590     0x041e: "th_TH", # Thai
   1591     0x0851: "bo_BT", # Tibetan - Bhutan
   1592     0x0451: "bo_CN", # Tibetan - PRC
   1593     0x041f: "tr_TR", # Turkish
   1594     0x0442: "tk_TM", # Turkmen - Cyrillic
   1595     0x0480: "ug_CN", # Uighur - Arabic
   1596     0x0422: "uk_UA", # Ukrainian
   1597     0x042e: "wen_DE",# Upper Sorbian - Germany
   1598     0x0420: "ur_PK", # Urdu
   1599     0x0820: "ur_IN", # Urdu - India
   1600     0x0443: "uz_UZ", # Uzbek - Latin
   1601     0x0843: "uz_UZ", # Uzbek - Cyrillic
   1602     0x042a: "vi_VN", # Vietnamese
   1603     0x0452: "cy_GB", # Welsh
   1604     0x0488: "wo_SN", # Wolof - Senegal
   1605     0x0434: "xh_ZA", # Xhosa - South Africa
   1606     0x0485: "sah_RU",# Yakut - Cyrillic
   1607     0x0478: "ii_CN", # Yi - PRC
   1608     0x046a: "yo_NG", # Yoruba - Nigeria
   1609     0x0435: "zu_ZA", # Zulu
   1610 }
   1611 
   1612 def _print_locale():
   1613 
   1614     """ Test function.
   1615     """
   1616     categories = {}
   1617     def _init_categories(categories=categories):
   1618         for k,v in globals().items():
   1619             if k[:3] == 'LC_':
   1620                 categories[k] = v
   1621     _init_categories()
   1622     del categories['LC_ALL']
   1623 
   1624     print('Locale defaults as determined by getdefaultlocale():')
   1625     print('-'*72)
   1626     lang, enc = getdefaultlocale()
   1627     print('Language: ', lang or '(undefined)')
   1628     print('Encoding: ', enc or '(undefined)')
   1629     print()
   1630 
   1631     print('Locale settings on startup:')
   1632     print('-'*72)
   1633     for name,category in categories.items():
   1634         print(name, '...')
   1635         lang, enc = getlocale(category)
   1636         print('   Language: ', lang or '(undefined)')
   1637         print('   Encoding: ', enc or '(undefined)')
   1638         print()
   1639 
   1640     print()
   1641     print('Locale settings after calling resetlocale():')
   1642     print('-'*72)
   1643     resetlocale()
   1644     for name,category in categories.items():
   1645         print(name, '...')
   1646         lang, enc = getlocale(category)
   1647         print('   Language: ', lang or '(undefined)')
   1648         print('   Encoding: ', enc or '(undefined)')
   1649         print()
   1650 
   1651     try:
   1652         setlocale(LC_ALL, "")
   1653     except:
   1654         print('NOTE:')
   1655         print('setlocale(LC_ALL, "") does not support the default locale')
   1656         print('given in the OS environment variables.')
   1657     else:
   1658         print()
   1659         print('Locale settings after calling setlocale(LC_ALL, ""):')
   1660         print('-'*72)
   1661         for name,category in categories.items():
   1662             print(name, '...')
   1663             lang, enc = getlocale(category)
   1664             print('   Language: ', lang or '(undefined)')
   1665             print('   Encoding: ', enc or '(undefined)')
   1666             print()
   1667 
   1668 ###
   1669 
   1670 try:
   1671     LC_MESSAGES
   1672 except NameError:
   1673     pass
   1674 else:
   1675     __all__.append("LC_MESSAGES")
   1676 
   1677 if __name__=='__main__':
   1678     print('Locale aliasing:')
   1679     print()
   1680     _print_locale()
   1681     print()
   1682     print('Number formatting:')
   1683     print()
   1684     _test()
   1685