Home | History | Annotate | Download | only in encodings
      1 """ Encoding Aliases Support
      2 
      3     This module is used by the encodings package search function to
      4     map encodings names to module names.
      5 
      6     Note that the search function normalizes the encoding names before
      7     doing the lookup, so the mapping will have to map normalized
      8     encoding names to module names.
      9 
     10     Contents:
     11 
     12         The following aliases dictionary contains mappings of all IANA
     13         character set names for which the Python core library provides
     14         codecs. In addition to these, a few Python specific codec
     15         aliases have also been added.
     16 
     17 """
     18 aliases = {
     19 
     20     # Please keep this list sorted alphabetically by value !
     21 
     22     # ascii codec
     23     '646'                : 'ascii',
     24     'ansi_x3.4_1968'     : 'ascii',
     25     'ansi_x3_4_1968'     : 'ascii', # some email headers use this non-standard name
     26     'ansi_x3.4_1986'     : 'ascii',
     27     'cp367'              : 'ascii',
     28     'csascii'            : 'ascii',
     29     'ibm367'             : 'ascii',
     30     'iso646_us'          : 'ascii',
     31     'iso_646.irv_1991'   : 'ascii',
     32     'iso_ir_6'           : 'ascii',
     33     'us'                 : 'ascii',
     34     'us_ascii'           : 'ascii',
     35 
     36     # base64_codec codec
     37     'base64'             : 'base64_codec',
     38     'base_64'            : 'base64_codec',
     39 
     40     # big5 codec
     41     'big5_tw'            : 'big5',
     42     'csbig5'             : 'big5',
     43 
     44     # big5hkscs codec
     45     'big5_hkscs'         : 'big5hkscs',
     46     'hkscs'              : 'big5hkscs',
     47 
     48     # bz2_codec codec
     49     'bz2'                : 'bz2_codec',
     50 
     51     # cp037 codec
     52     '037'                : 'cp037',
     53     'csibm037'           : 'cp037',
     54     'ebcdic_cp_ca'       : 'cp037',
     55     'ebcdic_cp_nl'       : 'cp037',
     56     'ebcdic_cp_us'       : 'cp037',
     57     'ebcdic_cp_wt'       : 'cp037',
     58     'ibm037'             : 'cp037',
     59     'ibm039'             : 'cp037',
     60 
     61     # cp1026 codec
     62     '1026'               : 'cp1026',
     63     'csibm1026'          : 'cp1026',
     64     'ibm1026'            : 'cp1026',
     65 
     66     # cp1125 codec
     67     '1125'                : 'cp1125',
     68     'ibm1125'             : 'cp1125',
     69     'cp866u'              : 'cp1125',
     70     'ruscii'              : 'cp1125',
     71 
     72     # cp1140 codec
     73     '1140'               : 'cp1140',
     74     'ibm1140'            : 'cp1140',
     75 
     76     # cp1250 codec
     77     '1250'               : 'cp1250',
     78     'windows_1250'       : 'cp1250',
     79 
     80     # cp1251 codec
     81     '1251'               : 'cp1251',
     82     'windows_1251'       : 'cp1251',
     83 
     84     # cp1252 codec
     85     '1252'               : 'cp1252',
     86     'windows_1252'       : 'cp1252',
     87 
     88     # cp1253 codec
     89     '1253'               : 'cp1253',
     90     'windows_1253'       : 'cp1253',
     91 
     92     # cp1254 codec
     93     '1254'               : 'cp1254',
     94     'windows_1254'       : 'cp1254',
     95 
     96     # cp1255 codec
     97     '1255'               : 'cp1255',
     98     'windows_1255'       : 'cp1255',
     99 
    100     # cp1256 codec
    101     '1256'               : 'cp1256',
    102     'windows_1256'       : 'cp1256',
    103 
    104     # cp1257 codec
    105     '1257'               : 'cp1257',
    106     'windows_1257'       : 'cp1257',
    107 
    108     # cp1258 codec
    109     '1258'               : 'cp1258',
    110     'windows_1258'       : 'cp1258',
    111 
    112     # cp273 codec
    113     '273'                : 'cp273',
    114     'ibm273'             : 'cp273',
    115     'csibm273'           : 'cp273',
    116 
    117     # cp424 codec
    118     '424'                : 'cp424',
    119     'csibm424'           : 'cp424',
    120     'ebcdic_cp_he'       : 'cp424',
    121     'ibm424'             : 'cp424',
    122 
    123     # cp437 codec
    124     '437'                : 'cp437',
    125     'cspc8codepage437'   : 'cp437',
    126     'ibm437'             : 'cp437',
    127 
    128     # cp500 codec
    129     '500'                : 'cp500',
    130     'csibm500'           : 'cp500',
    131     'ebcdic_cp_be'       : 'cp500',
    132     'ebcdic_cp_ch'       : 'cp500',
    133     'ibm500'             : 'cp500',
    134 
    135     # cp775 codec
    136     '775'                : 'cp775',
    137     'cspc775baltic'      : 'cp775',
    138     'ibm775'             : 'cp775',
    139 
    140     # cp850 codec
    141     '850'                : 'cp850',
    142     'cspc850multilingual' : 'cp850',
    143     'ibm850'             : 'cp850',
    144 
    145     # cp852 codec
    146     '852'                : 'cp852',
    147     'cspcp852'           : 'cp852',
    148     'ibm852'             : 'cp852',
    149 
    150     # cp855 codec
    151     '855'                : 'cp855',
    152     'csibm855'           : 'cp855',
    153     'ibm855'             : 'cp855',
    154 
    155     # cp857 codec
    156     '857'                : 'cp857',
    157     'csibm857'           : 'cp857',
    158     'ibm857'             : 'cp857',
    159 
    160     # cp858 codec
    161     '858'                : 'cp858',
    162     'csibm858'           : 'cp858',
    163     'ibm858'             : 'cp858',
    164 
    165     # cp860 codec
    166     '860'                : 'cp860',
    167     'csibm860'           : 'cp860',
    168     'ibm860'             : 'cp860',
    169 
    170     # cp861 codec
    171     '861'                : 'cp861',
    172     'cp_is'              : 'cp861',
    173     'csibm861'           : 'cp861',
    174     'ibm861'             : 'cp861',
    175 
    176     # cp862 codec
    177     '862'                : 'cp862',
    178     'cspc862latinhebrew' : 'cp862',
    179     'ibm862'             : 'cp862',
    180 
    181     # cp863 codec
    182     '863'                : 'cp863',
    183     'csibm863'           : 'cp863',
    184     'ibm863'             : 'cp863',
    185 
    186     # cp864 codec
    187     '864'                : 'cp864',
    188     'csibm864'           : 'cp864',
    189     'ibm864'             : 'cp864',
    190 
    191     # cp865 codec
    192     '865'                : 'cp865',
    193     'csibm865'           : 'cp865',
    194     'ibm865'             : 'cp865',
    195 
    196     # cp866 codec
    197     '866'                : 'cp866',
    198     'csibm866'           : 'cp866',
    199     'ibm866'             : 'cp866',
    200 
    201     # cp869 codec
    202     '869'                : 'cp869',
    203     'cp_gr'              : 'cp869',
    204     'csibm869'           : 'cp869',
    205     'ibm869'             : 'cp869',
    206 
    207     # cp932 codec
    208     '932'                : 'cp932',
    209     'ms932'              : 'cp932',
    210     'mskanji'            : 'cp932',
    211     'ms_kanji'           : 'cp932',
    212 
    213     # cp949 codec
    214     '949'                : 'cp949',
    215     'ms949'              : 'cp949',
    216     'uhc'                : 'cp949',
    217 
    218     # cp950 codec
    219     '950'                : 'cp950',
    220     'ms950'              : 'cp950',
    221 
    222     # euc_jis_2004 codec
    223     'jisx0213'           : 'euc_jis_2004',
    224     'eucjis2004'         : 'euc_jis_2004',
    225     'euc_jis2004'        : 'euc_jis_2004',
    226 
    227     # euc_jisx0213 codec
    228     'eucjisx0213'        : 'euc_jisx0213',
    229 
    230     # euc_jp codec
    231     'eucjp'              : 'euc_jp',
    232     'ujis'               : 'euc_jp',
    233     'u_jis'              : 'euc_jp',
    234 
    235     # euc_kr codec
    236     'euckr'              : 'euc_kr',
    237     'korean'             : 'euc_kr',
    238     'ksc5601'            : 'euc_kr',
    239     'ks_c_5601'          : 'euc_kr',
    240     'ks_c_5601_1987'     : 'euc_kr',
    241     'ksx1001'            : 'euc_kr',
    242     'ks_x_1001'          : 'euc_kr',
    243 
    244     # gb18030 codec
    245     'gb18030_2000'       : 'gb18030',
    246 
    247     # gb2312 codec
    248     'chinese'            : 'gb2312',
    249     'csiso58gb231280'    : 'gb2312',
    250     'euc_cn'             : 'gb2312',
    251     'euccn'              : 'gb2312',
    252     'eucgb2312_cn'       : 'gb2312',
    253     'gb2312_1980'        : 'gb2312',
    254     'gb2312_80'          : 'gb2312',
    255     'iso_ir_58'          : 'gb2312',
    256 
    257     # gbk codec
    258     '936'                : 'gbk',
    259     'cp936'              : 'gbk',
    260     'ms936'              : 'gbk',
    261 
    262     # hex_codec codec
    263     'hex'                : 'hex_codec',
    264 
    265     # hp_roman8 codec
    266     'roman8'             : 'hp_roman8',
    267     'r8'                 : 'hp_roman8',
    268     'csHPRoman8'         : 'hp_roman8',
    269 
    270     # hz codec
    271     'hzgb'               : 'hz',
    272     'hz_gb'              : 'hz',
    273     'hz_gb_2312'         : 'hz',
    274 
    275     # iso2022_jp codec
    276     'csiso2022jp'        : 'iso2022_jp',
    277     'iso2022jp'          : 'iso2022_jp',
    278     'iso_2022_jp'        : 'iso2022_jp',
    279 
    280     # iso2022_jp_1 codec
    281     'iso2022jp_1'        : 'iso2022_jp_1',
    282     'iso_2022_jp_1'      : 'iso2022_jp_1',
    283 
    284     # iso2022_jp_2 codec
    285     'iso2022jp_2'        : 'iso2022_jp_2',
    286     'iso_2022_jp_2'      : 'iso2022_jp_2',
    287 
    288     # iso2022_jp_2004 codec
    289     'iso_2022_jp_2004'   : 'iso2022_jp_2004',
    290     'iso2022jp_2004'     : 'iso2022_jp_2004',
    291 
    292     # iso2022_jp_3 codec
    293     'iso2022jp_3'        : 'iso2022_jp_3',
    294     'iso_2022_jp_3'      : 'iso2022_jp_3',
    295 
    296     # iso2022_jp_ext codec
    297     'iso2022jp_ext'      : 'iso2022_jp_ext',
    298     'iso_2022_jp_ext'    : 'iso2022_jp_ext',
    299 
    300     # iso2022_kr codec
    301     'csiso2022kr'        : 'iso2022_kr',
    302     'iso2022kr'          : 'iso2022_kr',
    303     'iso_2022_kr'        : 'iso2022_kr',
    304 
    305     # iso8859_10 codec
    306     'csisolatin6'        : 'iso8859_10',
    307     'iso_8859_10'        : 'iso8859_10',
    308     'iso_8859_10_1992'   : 'iso8859_10',
    309     'iso_ir_157'         : 'iso8859_10',
    310     'l6'                 : 'iso8859_10',
    311     'latin6'             : 'iso8859_10',
    312 
    313     # iso8859_11 codec
    314     'thai'               : 'iso8859_11',
    315     'iso_8859_11'        : 'iso8859_11',
    316     'iso_8859_11_2001'   : 'iso8859_11',
    317 
    318     # iso8859_13 codec
    319     'iso_8859_13'        : 'iso8859_13',
    320     'l7'                 : 'iso8859_13',
    321     'latin7'             : 'iso8859_13',
    322 
    323     # iso8859_14 codec
    324     'iso_8859_14'        : 'iso8859_14',
    325     'iso_8859_14_1998'   : 'iso8859_14',
    326     'iso_celtic'         : 'iso8859_14',
    327     'iso_ir_199'         : 'iso8859_14',
    328     'l8'                 : 'iso8859_14',
    329     'latin8'             : 'iso8859_14',
    330 
    331     # iso8859_15 codec
    332     'iso_8859_15'        : 'iso8859_15',
    333     'l9'                 : 'iso8859_15',
    334     'latin9'             : 'iso8859_15',
    335 
    336     # iso8859_16 codec
    337     'iso_8859_16'        : 'iso8859_16',
    338     'iso_8859_16_2001'   : 'iso8859_16',
    339     'iso_ir_226'         : 'iso8859_16',
    340     'l10'                : 'iso8859_16',
    341     'latin10'            : 'iso8859_16',
    342 
    343     # iso8859_2 codec
    344     'csisolatin2'        : 'iso8859_2',
    345     'iso_8859_2'         : 'iso8859_2',
    346     'iso_8859_2_1987'    : 'iso8859_2',
    347     'iso_ir_101'         : 'iso8859_2',
    348     'l2'                 : 'iso8859_2',
    349     'latin2'             : 'iso8859_2',
    350 
    351     # iso8859_3 codec
    352     'csisolatin3'        : 'iso8859_3',
    353     'iso_8859_3'         : 'iso8859_3',
    354     'iso_8859_3_1988'    : 'iso8859_3',
    355     'iso_ir_109'         : 'iso8859_3',
    356     'l3'                 : 'iso8859_3',
    357     'latin3'             : 'iso8859_3',
    358 
    359     # iso8859_4 codec
    360     'csisolatin4'        : 'iso8859_4',
    361     'iso_8859_4'         : 'iso8859_4',
    362     'iso_8859_4_1988'    : 'iso8859_4',
    363     'iso_ir_110'         : 'iso8859_4',
    364     'l4'                 : 'iso8859_4',
    365     'latin4'             : 'iso8859_4',
    366 
    367     # iso8859_5 codec
    368     'csisolatincyrillic' : 'iso8859_5',
    369     'cyrillic'           : 'iso8859_5',
    370     'iso_8859_5'         : 'iso8859_5',
    371     'iso_8859_5_1988'    : 'iso8859_5',
    372     'iso_ir_144'         : 'iso8859_5',
    373 
    374     # iso8859_6 codec
    375     'arabic'             : 'iso8859_6',
    376     'asmo_708'           : 'iso8859_6',
    377     'csisolatinarabic'   : 'iso8859_6',
    378     'ecma_114'           : 'iso8859_6',
    379     'iso_8859_6'         : 'iso8859_6',
    380     'iso_8859_6_1987'    : 'iso8859_6',
    381     'iso_ir_127'         : 'iso8859_6',
    382 
    383     # iso8859_7 codec
    384     'csisolatingreek'    : 'iso8859_7',
    385     'ecma_118'           : 'iso8859_7',
    386     'elot_928'           : 'iso8859_7',
    387     'greek'              : 'iso8859_7',
    388     'greek8'             : 'iso8859_7',
    389     'iso_8859_7'         : 'iso8859_7',
    390     'iso_8859_7_1987'    : 'iso8859_7',
    391     'iso_ir_126'         : 'iso8859_7',
    392 
    393     # iso8859_8 codec
    394     'csisolatinhebrew'   : 'iso8859_8',
    395     'hebrew'             : 'iso8859_8',
    396     'iso_8859_8'         : 'iso8859_8',
    397     'iso_8859_8_1988'    : 'iso8859_8',
    398     'iso_ir_138'         : 'iso8859_8',
    399 
    400     # iso8859_9 codec
    401     'csisolatin5'        : 'iso8859_9',
    402     'iso_8859_9'         : 'iso8859_9',
    403     'iso_8859_9_1989'    : 'iso8859_9',
    404     'iso_ir_148'         : 'iso8859_9',
    405     'l5'                 : 'iso8859_9',
    406     'latin5'             : 'iso8859_9',
    407 
    408     # johab codec
    409     'cp1361'             : 'johab',
    410     'ms1361'             : 'johab',
    411 
    412     # koi8_r codec
    413     'cskoi8r'            : 'koi8_r',
    414 
    415     # kz1048 codec
    416     'kz_1048'           : 'kz1048',
    417     'rk1048'            : 'kz1048',
    418     'strk1048_2002'     : 'kz1048',
    419 
    420     # latin_1 codec
    421     #
    422     # Note that the latin_1 codec is implemented internally in C and a
    423     # lot faster than the charmap codec iso8859_1 which uses the same
    424     # encoding. This is why we discourage the use of the iso8859_1
    425     # codec and alias it to latin_1 instead.
    426     #
    427     '8859'               : 'latin_1',
    428     'cp819'              : 'latin_1',
    429     'csisolatin1'        : 'latin_1',
    430     'ibm819'             : 'latin_1',
    431     'iso8859'            : 'latin_1',
    432     'iso8859_1'          : 'latin_1',
    433     'iso_8859_1'         : 'latin_1',
    434     'iso_8859_1_1987'    : 'latin_1',
    435     'iso_ir_100'         : 'latin_1',
    436     'l1'                 : 'latin_1',
    437     'latin'              : 'latin_1',
    438     'latin1'             : 'latin_1',
    439 
    440     # mac_cyrillic codec
    441     'maccyrillic'        : 'mac_cyrillic',
    442 
    443     # mac_greek codec
    444     'macgreek'           : 'mac_greek',
    445 
    446     # mac_iceland codec
    447     'maciceland'         : 'mac_iceland',
    448 
    449     # mac_latin2 codec
    450     'maccentraleurope'   : 'mac_latin2',
    451     'maclatin2'          : 'mac_latin2',
    452 
    453     # mac_roman codec
    454     'macintosh'          : 'mac_roman',
    455     'macroman'           : 'mac_roman',
    456 
    457     # mac_turkish codec
    458     'macturkish'         : 'mac_turkish',
    459 
    460     # mbcs codec
    461     'ansi'               : 'mbcs',
    462     'dbcs'               : 'mbcs',
    463 
    464     # ptcp154 codec
    465     'csptcp154'          : 'ptcp154',
    466     'pt154'              : 'ptcp154',
    467     'cp154'              : 'ptcp154',
    468     'cyrillic_asian'     : 'ptcp154',
    469 
    470     # quopri_codec codec
    471     'quopri'             : 'quopri_codec',
    472     'quoted_printable'   : 'quopri_codec',
    473     'quotedprintable'    : 'quopri_codec',
    474 
    475     # rot_13 codec
    476     'rot13'              : 'rot_13',
    477 
    478     # shift_jis codec
    479     'csshiftjis'         : 'shift_jis',
    480     'shiftjis'           : 'shift_jis',
    481     'sjis'               : 'shift_jis',
    482     's_jis'              : 'shift_jis',
    483 
    484     # shift_jis_2004 codec
    485     'shiftjis2004'       : 'shift_jis_2004',
    486     'sjis_2004'          : 'shift_jis_2004',
    487     's_jis_2004'         : 'shift_jis_2004',
    488 
    489     # shift_jisx0213 codec
    490     'shiftjisx0213'      : 'shift_jisx0213',
    491     'sjisx0213'          : 'shift_jisx0213',
    492     's_jisx0213'         : 'shift_jisx0213',
    493 
    494     # tactis codec
    495     'tis260'             : 'tactis',
    496 
    497     # tis_620 codec
    498     'tis620'             : 'tis_620',
    499     'tis_620_0'          : 'tis_620',
    500     'tis_620_2529_0'     : 'tis_620',
    501     'tis_620_2529_1'     : 'tis_620',
    502     'iso_ir_166'         : 'tis_620',
    503 
    504     # utf_16 codec
    505     'u16'                : 'utf_16',
    506     'utf16'              : 'utf_16',
    507 
    508     # utf_16_be codec
    509     'unicodebigunmarked' : 'utf_16_be',
    510     'utf_16be'           : 'utf_16_be',
    511 
    512     # utf_16_le codec
    513     'unicodelittleunmarked' : 'utf_16_le',
    514     'utf_16le'           : 'utf_16_le',
    515 
    516     # utf_32 codec
    517     'u32'                : 'utf_32',
    518     'utf32'              : 'utf_32',
    519 
    520     # utf_32_be codec
    521     'utf_32be'           : 'utf_32_be',
    522 
    523     # utf_32_le codec
    524     'utf_32le'           : 'utf_32_le',
    525 
    526     # utf_7 codec
    527     'u7'                 : 'utf_7',
    528     'utf7'               : 'utf_7',
    529     'unicode_1_1_utf_7'  : 'utf_7',
    530 
    531     # utf_8 codec
    532     'u8'                 : 'utf_8',
    533     'utf'                : 'utf_8',
    534     'utf8'               : 'utf_8',
    535     'utf8_ucs2'          : 'utf_8',
    536     'utf8_ucs4'          : 'utf_8',
    537 
    538     # uu_codec codec
    539     'uu'                 : 'uu_codec',
    540 
    541     # zlib_codec codec
    542     'zip'                : 'zlib_codec',
    543     'zlib'               : 'zlib_codec',
    544 
    545     # temporary mac CJK aliases, will be replaced by proper codecs in 3.1
    546     'x_mac_japanese'      : 'shift_jis',
    547     'x_mac_korean'        : 'euc_kr',
    548     'x_mac_simp_chinese'  : 'gb2312',
    549     'x_mac_trad_chinese'  : 'big5',
    550 }
    551