Home | History | Annotate | Download | only in src
      1 #!/usr/bin/python
      2 
      3 import sys
      4 import os.path
      5 
      6 if len (sys.argv) != 3:
      7 	print >>sys.stderr, "usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt"
      8 	sys.exit (1)
      9 
     10 files = [file (x) for x in sys.argv[1:]]
     11 
     12 headers = [[files[0].readline (), files[0].readline ()]]
     13 headers.append (["UnicodeData.txt does not have a header."])
     14 while files[0].readline ().find ('##################') < 0:
     15 	pass
     16 
     17 
     18 def print_joining_table(f):
     19 
     20 	print
     21 	print "static const uint8_t joining_table[] ="
     22 	print "{"
     23 
     24 	min_u = 0x110000
     25 	max_u = 0
     26 	num = 0
     27 	last = -1
     28 	block = ''
     29 	for line in f:
     30 
     31 		if line[0] == '#':
     32 			if line.find (" characters"):
     33 				block = line[2:].strip ()
     34 			continue
     35 
     36 		fields = [x.strip () for x in line.split (';')]
     37 		if len (fields) == 1:
     38 			continue
     39 
     40 		u = int (fields[0], 16)
     41 		if u == 0x200C or u == 0x200D:
     42 			continue
     43 		if u < last:
     44 			raise Exception ("Input data character not sorted", u)
     45 		min_u = min (min_u, u)
     46 		max_u = max (max_u, u)
     47 		num += 1
     48 
     49 		if block:
     50 			print "\n  /* %s */\n" % block
     51 			block = ''
     52 
     53 		if last != -1:
     54 			last += 1
     55 			while last < u:
     56 				print "  JOINING_TYPE_X, /* %04X */" % last
     57 				last += 1
     58 		else:
     59 			last = u
     60 
     61 		if fields[3] in ["ALAPH", "DALATH RISH"]:
     62 			value = "JOINING_GROUP_" + fields[3].replace(' ', '_')
     63 		else:
     64 			value = "JOINING_TYPE_" + fields[2]
     65 		print "  %s, /* %s */" % (value, '; '.join(fields))
     66 
     67 	print
     68 	print "};"
     69 	print
     70 	print "#define JOINING_TABLE_FIRST	0x%04X" % min_u
     71 	print "#define JOINING_TABLE_LAST	0x%04X" % max_u
     72 	print
     73 
     74 	occupancy = num * 100 / (max_u - min_u + 1)
     75 	# Maintain at least 40% occupancy in the table */
     76 	if occupancy < 40:
     77 		raise Exception ("Table too sparse, please investigate: ", occupancy)
     78 
     79 def print_shaping_table(f):
     80 
     81 	shapes = {}
     82 	ligatures = {}
     83 	names = {}
     84 	for line in f:
     85 
     86 		fields = [x.strip () for x in line.split (';')]
     87 		if fields[5][0:1] != '<':
     88 			continue
     89 
     90 		items = fields[5].split (' ')
     91 		shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:])
     92 
     93 		if not shape in ['initial', 'medial', 'isolated', 'final']:
     94 			continue
     95 
     96 		c = int (fields[0], 16)
     97 		if len (items) != 1:
     98 			# We only care about lam-alef ligatures
     99 			if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]:
    100 				continue
    101 
    102 			# Save ligature
    103 			names[c] = fields[1]
    104 			if items not in ligatures:
    105 				ligatures[items] = {}
    106 			ligatures[items][shape] = c
    107 			pass
    108 		else:
    109 			# Save shape
    110 			if items[0] not in names:
    111 				names[items[0]] = fields[1]
    112 			else:
    113 				names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip ()
    114 			if items[0] not in shapes:
    115 				shapes[items[0]] = {}
    116 			shapes[items[0]][shape] = c
    117 
    118 	print
    119 	print "static const uint16_t shaping_table[][4] ="
    120 	print "{"
    121 
    122 	keys = shapes.keys ()
    123 	min_u, max_u = min (keys), max (keys)
    124 	for u in range (min_u, max_u + 1):
    125 		s = [shapes[u][shape] if u in shapes and shape in shapes[u] else 0
    126 		     for shape in  ['initial', 'medial', 'final', 'isolated']]
    127 		value = ', '.join ("0x%04X" % c for c in s)
    128 		print "  {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else "")
    129 
    130 	print "};"
    131 	print
    132 	print "#define SHAPING_TABLE_FIRST	0x%04X" % min_u
    133 	print "#define SHAPING_TABLE_LAST	0x%04X" % max_u
    134 	print
    135 
    136 	ligas = {}
    137 	for pair in ligatures.keys ():
    138 		for shape in ligatures[pair]:
    139 			c = ligatures[pair][shape]
    140 			if shape == 'isolated':
    141 				liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final'])
    142 			elif shape == 'final':
    143 				liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final'])
    144 			else:
    145 				raise Exception ("Unexpected shape", shape)
    146 			if liga[0] not in ligas:
    147 				ligas[liga[0]] = []
    148 			ligas[liga[0]].append ((liga[1], c))
    149 	max_i = max (len (ligas[l]) for l in ligas)
    150 	print
    151 	print "static const struct ligature_set_t {"
    152 	print " uint16_t first;"
    153 	print " struct ligature_pairs_t {"
    154 	print "   uint16_t second;"
    155 	print "   uint16_t ligature;"
    156 	print " } ligatures[%d];" % max_i
    157 	print "} ligature_table[] ="
    158 	print "{"
    159 	keys = ligas.keys ()
    160 	keys.sort ()
    161 	for first in keys:
    162 
    163 		print "  { 0x%04X, {" % (first)
    164 		for liga in ligas[first]:
    165 			print "    { 0x%04X, 0x%04X }, /* %s */" % (liga[0], liga[1], names[liga[1]])
    166 		print "  }},"
    167 
    168 	print "};"
    169 	print
    170 
    171 
    172 
    173 print "/* == Start of generated table == */"
    174 print "/*"
    175 print " * The following table is generated by running:"
    176 print " *"
    177 print " *   ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt"
    178 print " *"
    179 print " * on files with these headers:"
    180 print " *"
    181 for h in headers:
    182 	for l in h:
    183 		print " * %s" % (l.strip())
    184 print " */"
    185 print
    186 print "#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH"
    187 print "#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH"
    188 print
    189 
    190 print_joining_table (files[0])
    191 print_shaping_table (files[1])
    192 
    193 print
    194 print "#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */"
    195 print
    196 print "/* == End of generated table == */"
    197 
    198