1 """This script generates a Python codec module from a Windows Code Page. 2 3 It uses the function MultiByteToWideChar to generate a decoding table. 4 """ 5 6 import ctypes 7 from ctypes import wintypes 8 from gencodec import codegen 9 import unicodedata 10 11 def genwinmap(codepage): 12 MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar 13 MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, 14 wintypes.LPCSTR, ctypes.c_int, 15 wintypes.LPWSTR, ctypes.c_int] 16 MultiByteToWideChar.restype = ctypes.c_int 17 18 enc2uni = {} 19 20 for i in range(32) + [127]: 21 enc2uni[i] = (i, 'CONTROL CHARACTER') 22 23 for i in range(256): 24 buf = ctypes.create_unicode_buffer(2) 25 ret = MultiByteToWideChar( 26 codepage, 0, 27 chr(i), 1, 28 buf, 2) 29 assert ret == 1, "invalid code page" 30 assert buf[1] == '\x00' 31 try: 32 name = unicodedata.name(buf[0]) 33 except ValueError: 34 try: 35 name = enc2uni[i][1] 36 except KeyError: 37 name = '' 38 39 enc2uni[i] = (ord(buf[0]), name) 40 41 return enc2uni 42 43 def genwincodec(codepage): 44 import platform 45 map = genwinmap(codepage) 46 encodingname = 'cp%d' % codepage 47 code = codegen("", map, encodingname) 48 # Replace first lines with our own docstring 49 code = '''\ 50 """Python Character Mapping Codec %s generated on Windows: 51 %s with the command: 52 python Tools/unicode/genwincodec.py %s 53 """#" 54 ''' % (encodingname, ' '.join(platform.win32_ver()), codepage 55 ) + code.split('"""#"', 1)[1] 56 57 print code 58 59 if __name__ == '__main__': 60 import sys 61 genwincodec(int(sys.argv[1])) 62