Home | History | Annotate | Download | only in unicode
      1 """This script generates a Python codec module from a Windows Code Page.
      2 
      3 It uses the function MultiByteToWideChar to generate a decoding table.
      4 """
      5 
      6 import ctypes
      7 from ctypes import wintypes
      8 from gencodec import codegen
      9 import unicodedata
     10 
     11 def genwinmap(codepage):
     12     MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
     13     MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
     14                                     wintypes.LPCSTR, ctypes.c_int,
     15                                     wintypes.LPWSTR, ctypes.c_int]
     16     MultiByteToWideChar.restype = ctypes.c_int
     17 
     18     enc2uni = {}
     19 
     20     for i in range(32) + [127]:
     21         enc2uni[i] = (i, 'CONTROL CHARACTER')
     22 
     23     for i in range(256):
     24         buf = ctypes.create_unicode_buffer(2)
     25         ret = MultiByteToWideChar(
     26             codepage, 0,
     27             chr(i), 1,
     28             buf, 2)
     29         assert ret == 1, "invalid code page"
     30         assert buf[1] == '\x00'
     31         try:
     32             name = unicodedata.name(buf[0])
     33         except ValueError:
     34             try:
     35                 name = enc2uni[i][1]
     36             except KeyError:
     37                 name = ''
     38 
     39         enc2uni[i] = (ord(buf[0]), name)
     40 
     41     return enc2uni
     42 
     43 def genwincodec(codepage):
     44     import platform
     45     map = genwinmap(codepage)
     46     encodingname = 'cp%d' % codepage
     47     code = codegen("", map, encodingname)
     48     # Replace first lines with our own docstring
     49     code = '''\
     50 """Python Character Mapping Codec %s generated on Windows:
     51 %s with the command:
     52   python Tools/unicode/genwincodec.py %s
     53 """#"
     54 ''' % (encodingname, ' '.join(platform.win32_ver()), codepage
     55       ) + code.split('"""#"', 1)[1]
     56 
     57     print code
     58 
     59 if __name__ == '__main__':
     60     import sys
     61     genwincodec(int(sys.argv[1]))
     62