Home | History | Annotate | Download | only in tools
      1 #! /usr/bin/env python
      2 
      3 # Copyright 2016 Google Inc.
      4 #
      5 # Use of this source code is governed by a BSD-style license that can be
      6 # found in the LICENSE file.
      7 
      8 import array, zlib, base64, sys
      9 
     10 #   import array, zlib, base64, sys, unicodedata
     11 #   def codepoints():
     12 #       for i in range(0x110000):
     13 #           try:
     14 #               u = ('\\U%08x' % i).decode('unicode-escape')
     15 #               n = unicodedata.name(u)
     16 #               c = unicodedata.category(u)
     17 #               if c[0] == 'C' or n.startswith('VARIATION '):
     18 #                   continue
     19 #               yield i
     20 #           except ValueError:
     21 #               pass
     22 #   def make_unicode_data():
     23 #       last = 0
     24 #       a = array.array('I')
     25 #       for i in codepoints():
     26 #         a.append(i - last - 1)
     27 #         last = i
     28 #       return base64.b64encode(zlib.compress(a.tostring(), 9))
     29 #   if __name__ == '__main__':
     30 #       b = make_unicode_data()
     31 #       for i in range(0, len(b), 76):
     32 #           print '  %s' % b[i:i+76]
     33 valid_codepoint_data = '''
     34   eNrt3LmOHEUYAOCt7p5jL89iAbZJwJY4ExKTIiJChAh4CgSvgIgREjEJEa+ARAQxEW/gR4GRbDGa
     35   7Z4+pq+q/j5p5fVMd3XVX3fPTr99wVSedjgnCBsAANyTlby2qllX9722tlYH4DqieTKG+etV3tYJ
     36   t5ntiNcqWxu9NnCbW+r6yLpQnaewn1rS3pFpPVhQWVcL7Dd/GVt6m1PGHL/WDfMXGsZlqLyHmvfy
     37   DjGsyvN+3V6cuG42wrhRVd5i4jiX/ZtVnFeW1309bUrS3fS4Pw8tyxlmEMsu7auqDPvzd2ekuYuo
     38   358Tw7K8b4/abh/9/66mLPmJ8uYHr4ej1/KKNJooWuYjezlXDLGOW7dob2P02zZph5o4ti3D5uj1
     39   dce2t23Y9lOId9byvKqYbyrafZt87SKN60WDsaltOS5L2mE2YH76vi+ZXwyr6Tolq8ljceLcums8
     40   PPj9ZiF7uGKCa75bUp/hYF5tu+Y5rN9z2n2o6Zuhh33qq+OeXTDkWB1zPykii00Y6dz3dQu45yrC
     41   POeqDaDx2qju79u63q+JpYxLiHvQrhetmOi6Y63H1qq4d9dCAABAIvuFlO162DuPtQ+47Dm9mzPL
     42   Hs5IZ86yRPK37vm4tn2hr2tQbxtRXleqq9QUnyH38XfadcfHsk7JRp7PhzDWvev1gG3MOiIdYeZx
     43   H3qN9JUmMMvxa6n78LZ5/Tzx9pYvpN7rfDmjvNwaBmGW3hQCYIL9W5ZYfsPRz9zyH7MwwbnZAHk5
     44   dY3Qc1nGui+fwv3/uwn6TlldjnF/+tEM4r27gHo+ZwYASHM/UOVJi2PfUJUAAFEIPadVjJzfqZ+9
     45   EmZWl1XPmD8ltc8s85nWeWh5rO+kAgAApKvpnu+3iMsY5J2F20SY56tE62L/LLC7gdPvMp6k8B7N
     46   fWEOS8aNEEDv3mp5fLHweBnrh+F5ZPMVw3difbZvPCwzt2fP3qhrQN8DAAAAAAAAAAAAAAAAAAAA
     47   AAAAAAAAAAAAAKCDKyEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
     48   AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACIwnMhAAAAAOBInnDZLlUvwChe
     49   n/j6WU/pbEfO91bTEV9m5dsEylA0PG6t/y9O273ReqZtl2auJ7puLvSdhA595R3jVec9QTZx+Yx3
     50   /3tc8tqLSMvStF2tVTsAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABA
     51   JzcDpl0kEJ/iqTYCxCVTbmbsvZZrktVI+QoVr4Wj94MqjIplHEA87qzhk/RBx/MeNjxufeK9yxnF
     52   IQyYVjHw9UhLIQQsUN5y7m3yet5xzL/tOO+PNa4PMV+dk2bX9dEzzX4xVpHPcbkqHKw9NPE80nL+
     53   oK128vGM9oM3I177kwT2wFPvhT4yvDKBpuvgr/Pq82O7NxQO5pxshOuc8mvC8+x+vfRnRUxWLWI3
     54   ZvsqWo7l2wbHbCZu798nOnZNsWbMRu4/S6zXPj2a8IOwsMA/KDseqz/daYMAAABz97MQzMbt0f9/
     55   vBYTAAAAYL7++UUMIDVXM86bZ3s1sxWC2fBd03J/CMFg+nrGyU9C2TvPYKmPTXYw12dnxi/UnKM+
     56   7scgO6MNh4nzXvX8zlwbALBPB4xHsXkcV3Y9U3Jevmlx7Gri/nr9cl+Wvdy7le3f9ve5989leHJ0
     57   7+CqJL3v/vuZy9eYHzQ45vdb7RUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
     58   AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
     59   AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
     60   AAAAAAAAAAAAAAAAAIB2PhQCAAAAAAAAAAAAAAAAAAAAYAB/fyYGAMA4/gXQcRfM
     61 '''
     62 def codepoints():
     63     i = 0
     64     for increment in array.array('I', zlib.decompress(
     65         base64.b64decode(valid_codepoint_data))).tolist():
     66         i += increment + 1
     67         yield i
     68 if sys.version_info[0] < 3:
     69     def to_unicode(i):
     70         return ('\\U%08x' % i).decode('unicode-escape').encode('UTF-8')
     71 else:
     72     def to_unicode(i):
     73         return chr(i)
     74 if __name__ == '__main__':
     75     o = sys.stdout
     76     o.write(to_unicode(0xFEFF))
     77     last_row = -1
     78     for i in codepoints():
     79         row = i - (i & 63)
     80         if last_row != row:
     81             if row:
     82                 o.write('\n' if row % 1024 else '\n\n')
     83             o.write('U+%06x ' % row)
     84             last_row = row
     85         o.write(' ' + to_unicode(i))
     86     o.write('\n')
     87