1 #! /usr/bin/env python 2 3 # Copyright 2016 Google Inc. 4 # 5 # Use of this source code is governed by a BSD-style license that can be 6 # found in the LICENSE file. 7 8 import array, zlib, base64, sys 9 10 # import array, zlib, base64, sys, unicodedata 11 # def codepoints(): 12 # for i in range(0x110000): 13 # try: 14 # u = ('\\U%08x' % i).decode('unicode-escape') 15 # n = unicodedata.name(u) 16 # c = unicodedata.category(u) 17 # if c[0] == 'C' or n.startswith('VARIATION '): 18 # continue 19 # yield i 20 # except ValueError: 21 # pass 22 # def make_unicode_data(): 23 # last = 0 24 # a = array.array('I') 25 # for i in codepoints(): 26 # a.append(i - last - 1) 27 # last = i 28 # return base64.b64encode(zlib.compress(a.tostring(), 9)) 29 # if __name__ == '__main__': 30 # b = make_unicode_data() 31 # for i in range(0, len(b), 76): 32 # print ' %s' % b[i:i+76] 33 valid_codepoint_data = ''' 34 eNrt3LmOHEUYAOCt7p5jL89iAbZJwJY4ExKTIiJChAh4CgSvgIgREjEJEa+ARAQxEW/gR4GRbDGa 35 7Z4+pq+q/j5p5fVMd3XVX3fPTr99wVSedjgnCBsAANyTlby2qllX9722tlYH4DqieTKG+etV3tYJ 36 t5ntiNcqWxu9NnCbW+r6yLpQnaewn1rS3pFpPVhQWVcL7Dd/GVt6m1PGHL/WDfMXGsZlqLyHmvfy 37 DjGsyvN+3V6cuG42wrhRVd5i4jiX/ZtVnFeW1309bUrS3fS4Pw8tyxlmEMsu7auqDPvzd2ekuYuo 38 358Tw7K8b4/abh/9/66mLPmJ8uYHr4ej1/KKNJooWuYjezlXDLGOW7dob2P02zZph5o4ti3D5uj1 39 dce2t23Y9lOId9byvKqYbyrafZt87SKN60WDsaltOS5L2mE2YH76vi+ZXwyr6Tolq8ljceLcums8 40 PPj9ZiF7uGKCa75bUp/hYF5tu+Y5rN9z2n2o6Zuhh33qq+OeXTDkWB1zPykii00Y6dz3dQu45yrC 41 POeqDaDx2qju79u63q+JpYxLiHvQrhetmOi6Y63H1qq4d9dCAABAIvuFlO162DuPtQ+47Dm9mzPL 42 Hs5IZ86yRPK37vm4tn2hr2tQbxtRXleqq9QUnyH38XfadcfHsk7JRp7PhzDWvev1gG3MOiIdYeZx 43 H3qN9JUmMMvxa6n78LZ5/Tzx9pYvpN7rfDmjvNwaBmGW3hQCYIL9W5ZYfsPRz9zyH7MwwbnZAHk5 44 dY3Qc1nGui+fwv3/uwn6TlldjnF/+tEM4r27gHo+ZwYASHM/UOVJi2PfUJUAAFEIPadVjJzfqZ+9 45 EmZWl1XPmD8ltc8s85nWeWh5rO+kAgAApKvpnu+3iMsY5J2F20SY56tE62L/LLC7gdPvMp6k8B7N 46 fWEOS8aNEEDv3mp5fLHweBnrh+F5ZPMVw3difbZvPCwzt2fP3qhrQN8DAAAAAAAAAAAAAAAAAAAA 47 AAAAAAAAAAAAAKCDKyEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 48 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACIwnMhAAAAAOBInnDZLlUvwChe 49 n/j6WU/pbEfO91bTEV9m5dsEylA0PG6t/y9O273ReqZtl2auJ7puLvSdhA595R3jVec9QTZx+Yx3 50 /3tc8tqLSMvStF2tVTsAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABA 51 JzcDpl0kEJ/iqTYCxCVTbmbsvZZrktVI+QoVr4Wj94MqjIplHEA87qzhk/RBx/MeNjxufeK9yxnF 52 IQyYVjHw9UhLIQQsUN5y7m3yet5xzL/tOO+PNa4PMV+dk2bX9dEzzX4xVpHPcbkqHKw9NPE80nL+ 53 oK128vGM9oM3I177kwT2wFPvhT4yvDKBpuvgr/Pq82O7NxQO5pxshOuc8mvC8+x+vfRnRUxWLWI3 54 ZvsqWo7l2wbHbCZu798nOnZNsWbMRu4/S6zXPj2a8IOwsMA/KDseqz/daYMAAABz97MQzMbt0f9/ 55 vBYTAAAAYL7++UUMIDVXM86bZ3s1sxWC2fBd03J/CMFg+nrGyU9C2TvPYKmPTXYw12dnxi/UnKM+ 56 7scgO6MNh4nzXvX8zlwbALBPB4xHsXkcV3Y9U3Jevmlx7Gri/nr9cl+Wvdy7le3f9ve5989leHJ0 57 7+CqJL3v/vuZy9eYHzQ45vdb7RUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 58 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 59 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 60 AAAAAAAAAAAAAAAAAIB2PhQCAAAAAAAAAAAAAAAAAAAAYAB/fyYGAMA4/gXQcRfM 61 ''' 62 def codepoints(): 63 i = 0 64 for increment in array.array('I', zlib.decompress( 65 base64.b64decode(valid_codepoint_data))).tolist(): 66 i += increment + 1 67 yield i 68 if sys.version_info[0] < 3: 69 def to_unicode(i): 70 return ('\\U%08x' % i).decode('unicode-escape').encode('UTF-8') 71 else: 72 def to_unicode(i): 73 return chr(i) 74 if __name__ == '__main__': 75 o = sys.stdout 76 o.write(to_unicode(0xFEFF)) 77 last_row = -1 78 for i in codepoints(): 79 row = i - (i & 63) 80 if last_row != row: 81 if row: 82 o.write('\n' if row % 1024 else '\n\n') 83 o.write('U+%06x ' % row) 84 last_row = row 85 o.write(' ' + to_unicode(i)) 86 o.write('\n') 87