Home | History | Annotate | Download | only in encodings
      1 """ Encoding Aliases Support
      2 
      3     This module is used by the encodings package search function to
      4     map encodings names to module names.
      5 
      6     Note that the search function normalizes the encoding names before
      7     doing the lookup, so the mapping will have to map normalized
      8     encoding names to module names.
      9 
     10     Contents:
     11 
     12         The following aliases dictionary contains mappings of all IANA
     13         character set names for which the Python core library provides
     14         codecs. In addition to these, a few Python specific codec
     15         aliases have also been added.
     16 
     17 """
     18 aliases = {
     19 
     20     # Please keep this list sorted alphabetically by value !
     21 
     22     # ascii codec
     23     '646'                : 'ascii',
     24     'ansi_x3.4_1968'     : 'ascii',
     25     'ansi_x3_4_1968'     : 'ascii', # some email headers use this non-standard name
     26     'ansi_x3.4_1986'     : 'ascii',
     27     'cp367'              : 'ascii',
     28     'csascii'            : 'ascii',
     29     'ibm367'             : 'ascii',
     30     'iso646_us'          : 'ascii',
     31     'iso_646.irv_1991'   : 'ascii',
     32     'iso_ir_6'           : 'ascii',
     33     'us'                 : 'ascii',
     34     'us_ascii'           : 'ascii',
     35 
     36     # base64_codec codec
     37     'base64'             : 'base64_codec',
     38     'base_64'            : 'base64_codec',
     39 
     40     # big5 codec
     41     'big5_tw'            : 'big5',
     42     'csbig5'             : 'big5',
     43 
     44     # big5hkscs codec
     45     'big5_hkscs'         : 'big5hkscs',
     46     'hkscs'              : 'big5hkscs',
     47 
     48     # bz2_codec codec
     49     'bz2'                : 'bz2_codec',
     50 
     51     # cp037 codec
     52     '037'                : 'cp037',
     53     'csibm037'           : 'cp037',
     54     'ebcdic_cp_ca'       : 'cp037',
     55     'ebcdic_cp_nl'       : 'cp037',
     56     'ebcdic_cp_us'       : 'cp037',
     57     'ebcdic_cp_wt'       : 'cp037',
     58     'ibm037'             : 'cp037',
     59     'ibm039'             : 'cp037',
     60 
     61     # cp1026 codec
     62     '1026'               : 'cp1026',
     63     'csibm1026'          : 'cp1026',
     64     'ibm1026'            : 'cp1026',
     65 
     66     # cp1140 codec
     67     '1140'               : 'cp1140',
     68     'ibm1140'            : 'cp1140',
     69 
     70     # cp1250 codec
     71     '1250'               : 'cp1250',
     72     'windows_1250'       : 'cp1250',
     73 
     74     # cp1251 codec
     75     '1251'               : 'cp1251',
     76     'windows_1251'       : 'cp1251',
     77 
     78     # cp1252 codec
     79     '1252'               : 'cp1252',
     80     'windows_1252'       : 'cp1252',
     81 
     82     # cp1253 codec
     83     '1253'               : 'cp1253',
     84     'windows_1253'       : 'cp1253',
     85 
     86     # cp1254 codec
     87     '1254'               : 'cp1254',
     88     'windows_1254'       : 'cp1254',
     89 
     90     # cp1255 codec
     91     '1255'               : 'cp1255',
     92     'windows_1255'       : 'cp1255',
     93 
     94     # cp1256 codec
     95     '1256'               : 'cp1256',
     96     'windows_1256'       : 'cp1256',
     97 
     98     # cp1257 codec
     99     '1257'               : 'cp1257',
    100     'windows_1257'       : 'cp1257',
    101 
    102     # cp1258 codec
    103     '1258'               : 'cp1258',
    104     'windows_1258'       : 'cp1258',
    105 
    106     # cp424 codec
    107     '424'                : 'cp424',
    108     'csibm424'           : 'cp424',
    109     'ebcdic_cp_he'       : 'cp424',
    110     'ibm424'             : 'cp424',
    111 
    112     # cp437 codec
    113     '437'                : 'cp437',
    114     'cspc8codepage437'   : 'cp437',
    115     'ibm437'             : 'cp437',
    116 
    117     # cp500 codec
    118     '500'                : 'cp500',
    119     'csibm500'           : 'cp500',
    120     'ebcdic_cp_be'       : 'cp500',
    121     'ebcdic_cp_ch'       : 'cp500',
    122     'ibm500'             : 'cp500',
    123 
    124     # cp775 codec
    125     '775'                : 'cp775',
    126     'cspc775baltic'      : 'cp775',
    127     'ibm775'             : 'cp775',
    128 
    129     # cp850 codec
    130     '850'                : 'cp850',
    131     'cspc850multilingual' : 'cp850',
    132     'ibm850'             : 'cp850',
    133 
    134     # cp852 codec
    135     '852'                : 'cp852',
    136     'cspcp852'           : 'cp852',
    137     'ibm852'             : 'cp852',
    138 
    139     # cp855 codec
    140     '855'                : 'cp855',
    141     'csibm855'           : 'cp855',
    142     'ibm855'             : 'cp855',
    143 
    144     # cp857 codec
    145     '857'                : 'cp857',
    146     'csibm857'           : 'cp857',
    147     'ibm857'             : 'cp857',
    148 
    149     # cp858 codec
    150     '858'                : 'cp858',
    151     'csibm858'           : 'cp858',
    152     'ibm858'             : 'cp858',
    153 
    154     # cp860 codec
    155     '860'                : 'cp860',
    156     'csibm860'           : 'cp860',
    157     'ibm860'             : 'cp860',
    158 
    159     # cp861 codec
    160     '861'                : 'cp861',
    161     'cp_is'              : 'cp861',
    162     'csibm861'           : 'cp861',
    163     'ibm861'             : 'cp861',
    164 
    165     # cp862 codec
    166     '862'                : 'cp862',
    167     'cspc862latinhebrew' : 'cp862',
    168     'ibm862'             : 'cp862',
    169 
    170     # cp863 codec
    171     '863'                : 'cp863',
    172     'csibm863'           : 'cp863',
    173     'ibm863'             : 'cp863',
    174 
    175     # cp864 codec
    176     '864'                : 'cp864',
    177     'csibm864'           : 'cp864',
    178     'ibm864'             : 'cp864',
    179 
    180     # cp865 codec
    181     '865'                : 'cp865',
    182     'csibm865'           : 'cp865',
    183     'ibm865'             : 'cp865',
    184 
    185     # cp866 codec
    186     '866'                : 'cp866',
    187     'csibm866'           : 'cp866',
    188     'ibm866'             : 'cp866',
    189 
    190     # cp869 codec
    191     '869'                : 'cp869',
    192     'cp_gr'              : 'cp869',
    193     'csibm869'           : 'cp869',
    194     'ibm869'             : 'cp869',
    195 
    196     # cp932 codec
    197     '932'                : 'cp932',
    198     'ms932'              : 'cp932',
    199     'mskanji'            : 'cp932',
    200     'ms_kanji'           : 'cp932',
    201 
    202     # cp949 codec
    203     '949'                : 'cp949',
    204     'ms949'              : 'cp949',
    205     'uhc'                : 'cp949',
    206 
    207     # cp950 codec
    208     '950'                : 'cp950',
    209     'ms950'              : 'cp950',
    210 
    211     # euc_jis_2004 codec
    212     'jisx0213'           : 'euc_jis_2004',
    213     'eucjis2004'         : 'euc_jis_2004',
    214     'euc_jis2004'        : 'euc_jis_2004',
    215 
    216     # euc_jisx0213 codec
    217     'eucjisx0213'        : 'euc_jisx0213',
    218 
    219     # euc_jp codec
    220     'eucjp'              : 'euc_jp',
    221     'ujis'               : 'euc_jp',
    222     'u_jis'              : 'euc_jp',
    223 
    224     # euc_kr codec
    225     'euckr'              : 'euc_kr',
    226     'korean'             : 'euc_kr',
    227     'ksc5601'            : 'euc_kr',
    228     'ks_c_5601'          : 'euc_kr',
    229     'ks_c_5601_1987'     : 'euc_kr',
    230     'ksx1001'            : 'euc_kr',
    231     'ks_x_1001'          : 'euc_kr',
    232 
    233     # gb18030 codec
    234     'gb18030_2000'       : 'gb18030',
    235 
    236     # gb2312 codec
    237     'chinese'            : 'gb2312',
    238     'csiso58gb231280'    : 'gb2312',
    239     'euc_cn'             : 'gb2312',
    240     'euccn'              : 'gb2312',
    241     'eucgb2312_cn'       : 'gb2312',
    242     'gb2312_1980'        : 'gb2312',
    243     'gb2312_80'          : 'gb2312',
    244     'iso_ir_58'          : 'gb2312',
    245 
    246     # gbk codec
    247     '936'                : 'gbk',
    248     'cp936'              : 'gbk',
    249     'ms936'              : 'gbk',
    250 
    251     # hex_codec codec
    252     'hex'                : 'hex_codec',
    253 
    254     # hp_roman8 codec
    255     'roman8'             : 'hp_roman8',
    256     'r8'                 : 'hp_roman8',
    257     'csHPRoman8'         : 'hp_roman8',
    258 
    259     # hz codec
    260     'hzgb'               : 'hz',
    261     'hz_gb'              : 'hz',
    262     'hz_gb_2312'         : 'hz',
    263 
    264     # iso2022_jp codec
    265     'csiso2022jp'        : 'iso2022_jp',
    266     'iso2022jp'          : 'iso2022_jp',
    267     'iso_2022_jp'        : 'iso2022_jp',
    268 
    269     # iso2022_jp_1 codec
    270     'iso2022jp_1'        : 'iso2022_jp_1',
    271     'iso_2022_jp_1'      : 'iso2022_jp_1',
    272 
    273     # iso2022_jp_2 codec
    274     'iso2022jp_2'        : 'iso2022_jp_2',
    275     'iso_2022_jp_2'      : 'iso2022_jp_2',
    276 
    277     # iso2022_jp_2004 codec
    278     'iso_2022_jp_2004'   : 'iso2022_jp_2004',
    279     'iso2022jp_2004'     : 'iso2022_jp_2004',
    280 
    281     # iso2022_jp_3 codec
    282     'iso2022jp_3'        : 'iso2022_jp_3',
    283     'iso_2022_jp_3'      : 'iso2022_jp_3',
    284 
    285     # iso2022_jp_ext codec
    286     'iso2022jp_ext'      : 'iso2022_jp_ext',
    287     'iso_2022_jp_ext'    : 'iso2022_jp_ext',
    288 
    289     # iso2022_kr codec
    290     'csiso2022kr'        : 'iso2022_kr',
    291     'iso2022kr'          : 'iso2022_kr',
    292     'iso_2022_kr'        : 'iso2022_kr',
    293 
    294     # iso8859_10 codec
    295     'csisolatin6'        : 'iso8859_10',
    296     'iso_8859_10'        : 'iso8859_10',
    297     'iso_8859_10_1992'   : 'iso8859_10',
    298     'iso_ir_157'         : 'iso8859_10',
    299     'l6'                 : 'iso8859_10',
    300     'latin6'             : 'iso8859_10',
    301 
    302     # iso8859_11 codec
    303     'thai'               : 'iso8859_11',
    304     'iso_8859_11'        : 'iso8859_11',
    305     'iso_8859_11_2001'   : 'iso8859_11',
    306 
    307     # iso8859_13 codec
    308     'iso_8859_13'        : 'iso8859_13',
    309     'l7'                 : 'iso8859_13',
    310     'latin7'             : 'iso8859_13',
    311 
    312     # iso8859_14 codec
    313     'iso_8859_14'        : 'iso8859_14',
    314     'iso_8859_14_1998'   : 'iso8859_14',
    315     'iso_celtic'         : 'iso8859_14',
    316     'iso_ir_199'         : 'iso8859_14',
    317     'l8'                 : 'iso8859_14',
    318     'latin8'             : 'iso8859_14',
    319 
    320     # iso8859_15 codec
    321     'iso_8859_15'        : 'iso8859_15',
    322     'l9'                 : 'iso8859_15',
    323     'latin9'             : 'iso8859_15',
    324 
    325     # iso8859_16 codec
    326     'iso_8859_16'        : 'iso8859_16',
    327     'iso_8859_16_2001'   : 'iso8859_16',
    328     'iso_ir_226'         : 'iso8859_16',
    329     'l10'                : 'iso8859_16',
    330     'latin10'            : 'iso8859_16',
    331 
    332     # iso8859_2 codec
    333     'csisolatin2'        : 'iso8859_2',
    334     'iso_8859_2'         : 'iso8859_2',
    335     'iso_8859_2_1987'    : 'iso8859_2',
    336     'iso_ir_101'         : 'iso8859_2',
    337     'l2'                 : 'iso8859_2',
    338     'latin2'             : 'iso8859_2',
    339 
    340     # iso8859_3 codec
    341     'csisolatin3'        : 'iso8859_3',
    342     'iso_8859_3'         : 'iso8859_3',
    343     'iso_8859_3_1988'    : 'iso8859_3',
    344     'iso_ir_109'         : 'iso8859_3',
    345     'l3'                 : 'iso8859_3',
    346     'latin3'             : 'iso8859_3',
    347 
    348     # iso8859_4 codec
    349     'csisolatin4'        : 'iso8859_4',
    350     'iso_8859_4'         : 'iso8859_4',
    351     'iso_8859_4_1988'    : 'iso8859_4',
    352     'iso_ir_110'         : 'iso8859_4',
    353     'l4'                 : 'iso8859_4',
    354     'latin4'             : 'iso8859_4',
    355 
    356     # iso8859_5 codec
    357     'csisolatincyrillic' : 'iso8859_5',
    358     'cyrillic'           : 'iso8859_5',
    359     'iso_8859_5'         : 'iso8859_5',
    360     'iso_8859_5_1988'    : 'iso8859_5',
    361     'iso_ir_144'         : 'iso8859_5',
    362 
    363     # iso8859_6 codec
    364     'arabic'             : 'iso8859_6',
    365     'asmo_708'           : 'iso8859_6',
    366     'csisolatinarabic'   : 'iso8859_6',
    367     'ecma_114'           : 'iso8859_6',
    368     'iso_8859_6'         : 'iso8859_6',
    369     'iso_8859_6_1987'    : 'iso8859_6',
    370     'iso_ir_127'         : 'iso8859_6',
    371 
    372     # iso8859_7 codec
    373     'csisolatingreek'    : 'iso8859_7',
    374     'ecma_118'           : 'iso8859_7',
    375     'elot_928'           : 'iso8859_7',
    376     'greek'              : 'iso8859_7',
    377     'greek8'             : 'iso8859_7',
    378     'iso_8859_7'         : 'iso8859_7',
    379     'iso_8859_7_1987'    : 'iso8859_7',
    380     'iso_ir_126'         : 'iso8859_7',
    381 
    382     # iso8859_8 codec
    383     'csisolatinhebrew'   : 'iso8859_8',
    384     'hebrew'             : 'iso8859_8',
    385     'iso_8859_8'         : 'iso8859_8',
    386     'iso_8859_8_1988'    : 'iso8859_8',
    387     'iso_ir_138'         : 'iso8859_8',
    388 
    389     # iso8859_9 codec
    390     'csisolatin5'        : 'iso8859_9',
    391     'iso_8859_9'         : 'iso8859_9',
    392     'iso_8859_9_1989'    : 'iso8859_9',
    393     'iso_ir_148'         : 'iso8859_9',
    394     'l5'                 : 'iso8859_9',
    395     'latin5'             : 'iso8859_9',
    396 
    397     # johab codec
    398     'cp1361'             : 'johab',
    399     'ms1361'             : 'johab',
    400 
    401     # koi8_r codec
    402     'cskoi8r'            : 'koi8_r',
    403 
    404     # latin_1 codec
    405     #
    406     # Note that the latin_1 codec is implemented internally in C and a
    407     # lot faster than the charmap codec iso8859_1 which uses the same
    408     # encoding. This is why we discourage the use of the iso8859_1
    409     # codec and alias it to latin_1 instead.
    410     #
    411     '8859'               : 'latin_1',
    412     'cp819'              : 'latin_1',
    413     'csisolatin1'        : 'latin_1',
    414     'ibm819'             : 'latin_1',
    415     'iso8859'            : 'latin_1',
    416     'iso8859_1'          : 'latin_1',
    417     'iso_8859_1'         : 'latin_1',
    418     'iso_8859_1_1987'    : 'latin_1',
    419     'iso_ir_100'         : 'latin_1',
    420     'l1'                 : 'latin_1',
    421     'latin'              : 'latin_1',
    422     'latin1'             : 'latin_1',
    423 
    424     # mac_cyrillic codec
    425     'maccyrillic'        : 'mac_cyrillic',
    426 
    427     # mac_greek codec
    428     'macgreek'           : 'mac_greek',
    429 
    430     # mac_iceland codec
    431     'maciceland'         : 'mac_iceland',
    432 
    433     # mac_latin2 codec
    434     'maccentraleurope'   : 'mac_latin2',
    435     'maclatin2'          : 'mac_latin2',
    436 
    437     # mac_roman codec
    438     'macroman'           : 'mac_roman',
    439 
    440     # mac_turkish codec
    441     'macturkish'         : 'mac_turkish',
    442 
    443     # mbcs codec
    444     'dbcs'               : 'mbcs',
    445 
    446     # ptcp154 codec
    447     'csptcp154'          : 'ptcp154',
    448     'pt154'              : 'ptcp154',
    449     'cp154'              : 'ptcp154',
    450     'cyrillic_asian'     : 'ptcp154',
    451 
    452     # quopri_codec codec
    453     'quopri'             : 'quopri_codec',
    454     'quoted_printable'   : 'quopri_codec',
    455     'quotedprintable'    : 'quopri_codec',
    456 
    457     # rot_13 codec
    458     'rot13'              : 'rot_13',
    459 
    460     # shift_jis codec
    461     'csshiftjis'         : 'shift_jis',
    462     'shiftjis'           : 'shift_jis',
    463     'sjis'               : 'shift_jis',
    464     's_jis'              : 'shift_jis',
    465 
    466     # shift_jis_2004 codec
    467     'shiftjis2004'       : 'shift_jis_2004',
    468     'sjis_2004'          : 'shift_jis_2004',
    469     's_jis_2004'         : 'shift_jis_2004',
    470 
    471     # shift_jisx0213 codec
    472     'shiftjisx0213'      : 'shift_jisx0213',
    473     'sjisx0213'          : 'shift_jisx0213',
    474     's_jisx0213'         : 'shift_jisx0213',
    475 
    476     # tactis codec
    477     'tis260'             : 'tactis',
    478 
    479     # tis_620 codec
    480     'tis620'             : 'tis_620',
    481     'tis_620_0'          : 'tis_620',
    482     'tis_620_2529_0'     : 'tis_620',
    483     'tis_620_2529_1'     : 'tis_620',
    484     'iso_ir_166'         : 'tis_620',
    485 
    486     # utf_16 codec
    487     'u16'                : 'utf_16',
    488     'utf16'              : 'utf_16',
    489 
    490     # utf_16_be codec
    491     'unicodebigunmarked' : 'utf_16_be',
    492     'utf_16be'           : 'utf_16_be',
    493 
    494     # utf_16_le codec
    495     'unicodelittleunmarked' : 'utf_16_le',
    496     'utf_16le'           : 'utf_16_le',
    497 
    498     # utf_32 codec
    499     'u32'                : 'utf_32',
    500     'utf32'              : 'utf_32',
    501 
    502     # utf_32_be codec
    503     'utf_32be'           : 'utf_32_be',
    504 
    505     # utf_32_le codec
    506     'utf_32le'           : 'utf_32_le',
    507 
    508     # utf_7 codec
    509     'u7'                 : 'utf_7',
    510     'utf7'               : 'utf_7',
    511     'unicode_1_1_utf_7'  : 'utf_7',
    512 
    513     # utf_8 codec
    514     'u8'                 : 'utf_8',
    515     'utf'                : 'utf_8',
    516     'utf8'               : 'utf_8',
    517     'utf8_ucs2'          : 'utf_8',
    518     'utf8_ucs4'          : 'utf_8',
    519 
    520     # uu_codec codec
    521     'uu'                 : 'uu_codec',
    522 
    523     # zlib_codec codec
    524     'zip'                : 'zlib_codec',
    525     'zlib'               : 'zlib_codec',
    526 
    527 }
    528